From 7b7232f3fb5ecd7c30cb52df368070cc5f5ca614 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Wed, 1 Feb 2006 21:06:49 -0600 Subject: [SCSI] iscsi update: cleanup iscsi class interface From: michaelc@cs.wisc.edu fujita.tomonori@lab.ntt.co.jp da-x@monatomic.org and err path fixup from: ogerlitz@voltaire.com This patch cleans up that interface by having the lld and class pass a iscsi_cls_session or iscsi_cls_conn between each other when the function is used by HW and SW iscsi llds. This way the lld does not have to remember if it has to send a handle or pointer and a handle or pointer to connection, session or host. This also has the class verify the session handle that gets passed from userspace instead of using the pointer passed into the kernel directly. Signed-off-by: Mike Christie Signed-off-by: Alex Aizman Signed-off-by: Dmitry Yusupov Signed-off-by: James Bottomley --- include/scsi/iscsi_if.h | 3 --- include/scsi/scsi_transport_iscsi.h | 34 ++++++++++++++++++---------------- 2 files changed, 18 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h index 3e5cb5ab2d34..e5618b90996e 100644 --- a/include/scsi/iscsi_if.h +++ b/include/scsi/iscsi_if.h @@ -163,9 +163,6 @@ enum iscsi_param { }; #define ISCSI_PARAM_MAX 14 -typedef uint64_t iscsi_sessionh_t; /* iSCSI Data-Path session handle */ -typedef uint64_t iscsi_connh_t; /* iSCSI Data-Path connection handle */ - #define iscsi_ptr(_handle) ((void*)(unsigned long)_handle) #define iscsi_handle(_ptr) ((uint64_t)(unsigned long)_ptr) #define hostdata_session(_hostdata) (iscsi_ptr(*(unsigned long *)_hostdata)) diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 16602a547a63..b41cf077e54b 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -63,25 +63,28 @@ struct iscsi_transport { int max_lun; unsigned int max_conn; unsigned int max_cmd_len; - struct Scsi_Host *(*create_session) (struct scsi_transport_template *t, - uint32_t initial_cmdsn); - void (*destroy_session) (struct Scsi_Host *shost); - struct iscsi_cls_conn *(*create_conn) (struct Scsi_Host *shost, + struct iscsi_cls_session *(*create_session) + (struct scsi_transport_template *t, uint32_t sn, uint32_t *sid); + void (*destroy_session) (struct iscsi_cls_session *session); + struct iscsi_cls_conn *(*create_conn) (struct iscsi_cls_session *sess, uint32_t cid); - int (*bind_conn) (iscsi_sessionh_t session, iscsi_connh_t conn, + int (*bind_conn) (struct iscsi_cls_session *session, + struct iscsi_cls_conn *cls_conn, uint32_t transport_fd, int is_leading); - int (*start_conn) (iscsi_connh_t conn); - void (*stop_conn) (iscsi_connh_t conn, int flag); + int (*start_conn) (struct iscsi_cls_conn *conn); + void (*stop_conn) (struct iscsi_cls_conn *conn, int flag); void (*destroy_conn) (struct iscsi_cls_conn *conn); - int (*set_param) (iscsi_connh_t conn, enum iscsi_param param, + int (*set_param) (struct iscsi_cls_conn *conn, enum iscsi_param param, uint32_t value); - int (*get_conn_param) (void *conndata, enum iscsi_param param, + int (*get_conn_param) (struct iscsi_cls_conn *conn, + enum iscsi_param param, uint32_t *value); - int (*get_session_param) (struct Scsi_Host *shost, + int (*get_session_param) (struct iscsi_cls_session *session, enum iscsi_param param, uint32_t *value); - int (*send_pdu) (iscsi_connh_t conn, struct iscsi_hdr *hdr, + int (*send_pdu) (struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size); - void (*get_stats) (iscsi_connh_t conn, struct iscsi_stats *stats); + void (*get_stats) (struct iscsi_cls_conn *conn, + struct iscsi_stats *stats); }; /* @@ -93,15 +96,14 @@ extern int iscsi_unregister_transport(struct iscsi_transport *tt); /* * control plane upcalls */ -extern void iscsi_conn_error(iscsi_connh_t conn, enum iscsi_err error); -extern int iscsi_recv_pdu(iscsi_connh_t conn, struct iscsi_hdr *hdr, +extern void iscsi_conn_error(struct iscsi_cls_conn *conn, enum iscsi_err error); +extern int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size); struct iscsi_cls_conn { struct list_head conn_list; /* item in connlist */ void *dd_data; /* LLD private data */ struct iscsi_transport *transport; - iscsi_connh_t connh; int active; /* must be accessed with the connlock */ struct device dev; /* sysfs transport/container device */ struct mempool_zone *z_error; @@ -113,7 +115,7 @@ struct iscsi_cls_conn { container_of(_dev, struct iscsi_cls_conn, dev) struct iscsi_cls_session { - struct list_head list; /* item in session_list */ + struct list_head sess_list; /* item in session_list */ struct iscsi_transport *transport; struct device dev; /* sysfs transport/container device */ }; -- cgit v1.2.3 From faead26d7a06605add627f29aee73ba654ce11f9 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 14 Feb 2006 10:42:07 -0600 Subject: [PATCH] add scsi_execute_in_process_context() API We have several points in the SCSI stack (primarily for our device functions) where we need to guarantee process context, but (given the place where the last reference was released) we cannot guarantee this. This API gets around the issue by executing the function directly if the caller has process context, but scheduling a workqueue to execute in process context if the caller doesn't have it. Unfortunately, it requires memory allocation in interrupt context, but it's better than what we have previously. The true solution will require a bit of re-engineering, so isn't appropriate for 2.6.16. Signed-off-by: James Bottomley --- drivers/scsi/scsi_lib.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ include/scsi/scsi.h | 2 ++ 2 files changed, 61 insertions(+) (limited to 'include') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 4a602853a98e..4362dcde74af 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -2248,3 +2249,61 @@ scsi_target_unblock(struct device *dev) device_for_each_child(dev, NULL, target_unblock); } EXPORT_SYMBOL_GPL(scsi_target_unblock); + + +struct work_queue_work { + struct work_struct work; + void (*fn)(void *); + void *data; +}; + +static void execute_in_process_context_work(void *data) +{ + void (*fn)(void *data); + struct work_queue_work *wqw = data; + + fn = wqw->fn; + data = wqw->data; + + kfree(wqw); + + fn(data); +} + +/** + * scsi_execute_in_process_context - reliably execute the routine with user context + * @fn: the function to execute + * @data: data to pass to the function + * + * Executes the function immediately if process context is available, + * otherwise schedules the function for delayed execution. + * + * Returns: 0 - function was executed + * 1 - function was scheduled for execution + * <0 - error + */ +int scsi_execute_in_process_context(void (*fn)(void *data), void *data) +{ + struct work_queue_work *wqw; + + if (!in_interrupt()) { + fn(data); + return 0; + } + + wqw = kmalloc(sizeof(struct work_queue_work), GFP_ATOMIC); + + if (unlikely(!wqw)) { + printk(KERN_ERR "Failed to allocate memory\n"); + WARN_ON(1); + return -ENOMEM; + } + + INIT_WORK(&wqw->work, execute_in_process_context_work, wqw); + wqw->fn = fn; + wqw->data = data; + schedule_work(&wqw->work); + + return 1; +} +EXPORT_SYMBOL_GPL(scsi_execute_in_process_context); diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index c60b8ff2f5e4..9c331258bc27 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -433,4 +433,6 @@ struct scsi_lun { /* Used to obtain the PCI location of a device */ #define SCSI_IOCTL_GET_PCI 0x5387 +int scsi_execute_in_process_context(void (*fn)(void *data), void *data); + #endif /* _SCSI_SCSI_H */ -- cgit v1.2.3 From a6b14fa6fdc01ab3519c2729624f808677539b59 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Tue, 14 Feb 2006 15:01:12 -0800 Subject: [IA64] Count disabled cpus as potential hot-pluggable CPUs Have a facility to account for potentially hot-pluggable CPUs. ACPI doesnt give a determinstic method to find hot-pluggable CPUs. Hence we use 2 methods to assist. - BIOS can mark potentially hot-pluggable CPUs as disabled in the MADT tables. - User can specify the number of hot-pluggable CPUs via parameter additional_cpus=X The option is enabled only if ACPI_CONFIG_HOTPLUG_CPU=y which enables the physical hotplug option. Without which user can still use logical onlining and offlining of CPUs by enabling CONFIG_HOTPLUG_CPU=y Adds more bits to cpu_possible_map for potentially hot-pluggable cpus. Signed-off-by: Ashok Raj Signed-off-by: Tony Luck --- arch/ia64/kernel/acpi.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++ arch/ia64/kernel/setup.c | 4 ++++ include/asm-ia64/acpi.h | 2 ++ 3 files changed, 62 insertions(+) (limited to 'include') diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index d2702c419cf8..34795ede72e0 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -761,6 +761,62 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) return (0); } +int additional_cpus __initdata = -1; + +static __init int setup_additional_cpus(char *s) +{ + if (s) + additional_cpus = simple_strtol(s, NULL, 0); + + return 0; +} + +early_param("additional_cpus", setup_additional_cpus); + +/* + * cpu_possible_map should be static, it cannot change as cpu's + * are onlined, or offlined. The reason is per-cpu data-structures + * are allocated by some modules at init time, and dont expect to + * do this dynamically on cpu arrival/departure. + * cpu_present_map on the other hand can change dynamically. + * In case when cpu_hotplug is not compiled, then we resort to current + * behaviour, which is cpu_possible == cpu_present. + * - Ashok Raj + * + * Three ways to find out the number of additional hotplug CPUs: + * - If the BIOS specified disabled CPUs in ACPI/mptables use that. + * - The user can overwrite it with additional_cpus=NUM + * - Otherwise don't reserve additional CPUs. + */ +__init void prefill_possible_map(void) +{ + int i; + int possible, disabled_cpus; + + disabled_cpus = total_cpus - available_cpus; + if (additional_cpus == -1) { + if (disabled_cpus > 0) { + possible = total_cpus; + additional_cpus = disabled_cpus; + } + else { + possible = available_cpus; + additional_cpus = 0; + } + } else { + possible = available_cpus + additional_cpus; + } + if (possible > NR_CPUS) + possible = NR_CPUS; + + printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", + possible, + max_t(int, additional_cpus, 0)); + + for (i = 0; i < possible; i++) + cpu_set(i, cpu_possible_map); +} + int acpi_map_lsapic(acpi_handle handle, int *pcpu) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 35f7835294a3..3258e09278d0 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -430,6 +430,7 @@ setup_arch (char **cmdline_p) if (early_console_setup(*cmdline_p) == 0) mark_bsp_online(); + parse_early_param(); #ifdef CONFIG_ACPI /* Initialize the ACPI boot-time table parser */ acpi_table_init(); @@ -688,6 +689,9 @@ void setup_per_cpu_areas (void) { /* start_kernel() requires this... */ +#ifdef CONFIG_ACPI_HOTPLUG_CPU + prefill_possible_map(); +#endif } /* diff --git a/include/asm-ia64/acpi.h b/include/asm-ia64/acpi.h index 3a544ffc5008..f7a517654308 100644 --- a/include/asm-ia64/acpi.h +++ b/include/asm-ia64/acpi.h @@ -106,6 +106,8 @@ extern unsigned int can_cpei_retarget(void); extern unsigned int is_cpu_cpei_target(unsigned int cpu); extern void set_cpei_target_cpu(unsigned int cpu); extern unsigned int get_cpei_target_cpu(void); +extern void prefill_possible_map(void); +extern int additional_cpus; #ifdef CONFIG_ACPI_NUMA /* Proximity bitmap length; _PXM is at most 255 (8 bit)*/ -- cgit v1.2.3 From ee68cea2c26b7a8222f9020f54d22c6067011e8b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Feb 2006 01:34:23 -0800 Subject: [NETFILTER]: Fix xfrm lookup after SNAT To find out if a packet needs to be handled by IPsec after SNAT, packets are currently rerouted in POST_ROUTING and a new xfrm lookup is done. This breaks SNAT of non-unicast packets to non-local addresses because the packet is routed as incoming packet and no neighbour entry is bound to the dst_entry. In general, it seems to be a bad idea to replace the dst_entry after the packet was already sent to the output routine because its state might not match what's expected. This patch changes the xfrm lookup in POST_ROUTING to re-use the original dst_entry without routing the packet again. This means no policy routing can be used for transport mode transforms (which keep the original route) when packets are SNATed to match the policy, but it looks like the best we can do for now. Signed-off-by: Patrick McHardy Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4.h | 2 +- net/ipv4/netfilter.c | 41 ++++++++++++++++++++++++++++++++++ net/ipv4/netfilter/ip_nat_standalone.c | 6 ++--- 3 files changed, 45 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index fdc4a9527343..43c09d790b83 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -79,7 +79,7 @@ enum nf_ip_hook_priorities { #ifdef __KERNEL__ extern int ip_route_me_harder(struct sk_buff **pskb); - +extern int ip_xfrm_me_harder(struct sk_buff **pskb); #endif /*__KERNEL__*/ #endif /*__LINUX_IP_NETFILTER_H*/ diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 52a3d7c57907..ed42cdc57cd9 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -78,6 +78,47 @@ int ip_route_me_harder(struct sk_buff **pskb) } EXPORT_SYMBOL(ip_route_me_harder); +#ifdef CONFIG_XFRM +int ip_xfrm_me_harder(struct sk_buff **pskb) +{ + struct flowi fl; + unsigned int hh_len; + struct dst_entry *dst; + + if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) + return 0; + if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0) + return -1; + + dst = (*pskb)->dst; + if (dst->xfrm) + dst = ((struct xfrm_dst *)dst)->route; + dst_hold(dst); + + if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0) + return -1; + + dst_release((*pskb)->dst); + (*pskb)->dst = dst; + + /* Change in oif may mean change in hh_len. */ + hh_len = (*pskb)->dst->dev->hard_header_len; + if (skb_headroom(*pskb) < hh_len) { + struct sk_buff *nskb; + + nskb = skb_realloc_headroom(*pskb, hh_len); + if (!nskb) + return -1; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } + return 0; +} +EXPORT_SYMBOL(ip_xfrm_me_harder); +#endif + void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(ip_nat_decode_session); diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 92c54999a19d..7c3f7d380240 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -235,19 +235,19 @@ ip_nat_out(unsigned int hooknum, return NF_ACCEPT; ret = ip_nat_fn(hooknum, pskb, in, out, okfn); +#ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.src.ip != ct->tuplehash[!dir].tuple.dst.ip -#ifdef CONFIG_XFRM || ct->tuplehash[dir].tuple.src.u.all != ct->tuplehash[!dir].tuple.dst.u.all -#endif ) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP; } +#endif return ret; } -- cgit v1.2.3 From 5ecfbae093f0c37311e89b29bfc0c9d586eace87 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 15 Feb 2006 22:50:10 +0300 Subject: [PATCH] fix zap_thread's ptrace related problems 1. The tracee can go from ptrace_stop() to do_signal_stop() after __ptrace_unlink(p). 2. It is unsafe to __ptrace_unlink(p) while p->parent may wait for tasklist_lock in ptrace_detach(). Signed-off-by: Oleg Nesterov Cc: Roland McGrath Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Eric W. Biederman Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- include/linux/ptrace.h | 1 + kernel/ptrace.c | 25 +++++++++++++++---------- 3 files changed, 17 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/exec.c b/fs/exec.c index 055378d2513e..0e1c95074d42 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1403,7 +1403,7 @@ static void zap_threads (struct mm_struct *mm) do_each_thread(g,p) { if (mm == p->mm && p != tsk && p->ptrace && p->parent->mm == mm) { - __ptrace_unlink(p); + __ptrace_detach(p, 0); } } while_each_thread(g,p); write_unlock_irq(&tasklist_lock); diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 9d5cd106b344..0d36750fc0f1 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -84,6 +84,7 @@ extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __us extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); extern int ptrace_attach(struct task_struct *tsk); extern int ptrace_detach(struct task_struct *, unsigned int); +extern void __ptrace_detach(struct task_struct *, unsigned int); extern void ptrace_disable(struct task_struct *); extern int ptrace_check_attach(struct task_struct *task, int kill); extern int ptrace_request(struct task_struct *child, long request, long addr, long data); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index d2cf144d0af5..d95a72c9279d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -72,8 +72,8 @@ void ptrace_untrace(task_t *child) */ void __ptrace_unlink(task_t *child) { - if (!child->ptrace) - BUG(); + BUG_ON(!child->ptrace); + child->ptrace = 0; if (!list_empty(&child->ptrace_list)) { list_del_init(&child->ptrace_list); @@ -184,22 +184,27 @@ bad: return retval; } +void __ptrace_detach(struct task_struct *child, unsigned int data) +{ + child->exit_code = data; + /* .. re-parent .. */ + __ptrace_unlink(child); + /* .. and wake it up. */ + if (child->exit_state != EXIT_ZOMBIE) + wake_up_process(child); +} + int ptrace_detach(struct task_struct *child, unsigned int data) { if (!valid_signal(data)) - return -EIO; + return -EIO; /* Architecture-specific hardware disable .. */ ptrace_disable(child); - /* .. re-parent .. */ - child->exit_code = data; - write_lock_irq(&tasklist_lock); - __ptrace_unlink(child); - /* .. and wake it up. */ - if (child->exit_state != EXIT_ZOMBIE) - wake_up_process(child); + if (child->ptrace) + __ptrace_detach(child, data); write_unlock_irq(&tasklist_lock); return 0; -- cgit v1.2.3 From 8ed9b2c7a804335004e4bd3b4c6989c5b6bc243f Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Mon, 13 Feb 2006 05:29:57 -0500 Subject: [IA64-SGI] sn2 minor fixes and cleanups General SN2 code cleanup: - Do not initialize global variables to zero - Use kzalloc instead of kmalloc+memset - Check kmalloc return values - Do not obfuscate spin lock calls - Remove some unused code - Various formatting cleanups Signed-off-by: Jes Sorensen Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/io_init.c | 96 ++++++++++++++------------------- arch/ia64/sn/kernel/sn2/prominfo_proc.c | 25 ++++----- arch/ia64/sn/kernel/sn2/sn2_smp.c | 35 ++++++------ arch/ia64/sn/kernel/sn2/sn_proc_fs.c | 22 ++++---- arch/ia64/sn/kernel/tiocx.c | 4 +- arch/ia64/sn/pci/pci_dma.c | 16 +++--- arch/ia64/sn/pci/pcibr/pcibr_ate.c | 29 +++------- arch/ia64/sn/pci/pcibr/pcibr_dma.c | 14 ++--- arch/ia64/sn/pci/pcibr/pcibr_provider.c | 9 ++-- include/asm-ia64/sn/bte.h | 6 +-- include/asm-ia64/sn/pcibr_provider.h | 14 +---- include/asm-ia64/sn/sn_feature_sets.h | 3 -- 12 files changed, 111 insertions(+), 162 deletions(-) (limited to 'include') diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 3437c2390429..dfb3f2902379 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -23,6 +23,10 @@ #include "xtalk/hubdev.h" #include "xtalk/xwidgetdev.h" + +extern void sn_init_cpei_timer(void); +extern void register_sn_procfs(void); + static struct list_head sn_sysdata_list; /* sysdata list struct */ @@ -40,12 +44,12 @@ struct brick { struct slab_info slab_info[MAX_SLABS + 1]; }; -int sn_ioif_inited = 0; /* SN I/O infrastructure initialized? */ +int sn_ioif_inited; /* SN I/O infrastructure initialized? */ struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES]; /* indexed by asic type */ -static int max_segment_number = 0; /* Default highest segment number */ -static int max_pcibus_number = 255; /* Default highest pci bus number */ +static int max_segment_number; /* Default highest segment number */ +static int max_pcibus_number = 255; /* Default highest pci bus number */ /* * Hooks and struct for unsupported pci providers @@ -84,7 +88,6 @@ static inline u64 sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, u64 address) { - struct ia64_sal_retval ret_stuff; ret_stuff.status = 0; ret_stuff.v0 = 0; @@ -94,7 +97,6 @@ sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, (u64) nasid, (u64) widget_num, (u64) device_num, (u64) address, 0, 0, 0); return ret_stuff.status; - } /* @@ -102,7 +104,6 @@ sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num, */ static inline u64 sal_get_hubdev_info(u64 handle, u64 address) { - struct ia64_sal_retval ret_stuff; ret_stuff.status = 0; ret_stuff.v0 = 0; @@ -118,7 +119,6 @@ static inline u64 sal_get_hubdev_info(u64 handle, u64 address) */ static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address) { - struct ia64_sal_retval ret_stuff; ret_stuff.status = 0; ret_stuff.v0 = 0; @@ -215,7 +215,7 @@ static void __init sn_fixup_ionodes(void) struct hubdev_info *hubdev; u64 status; u64 nasid; - int i, widget, device; + int i, widget, device, size; /* * Get SGI Specific HUB chipset information. @@ -251,48 +251,37 @@ static void __init sn_fixup_ionodes(void) if (!hubdev->hdi_flush_nasid_list.widget_p) continue; + size = (HUB_WIDGET_ID_MAX + 1) * + sizeof(struct sn_flush_device_kernel *); hubdev->hdi_flush_nasid_list.widget_p = - kmalloc((HUB_WIDGET_ID_MAX + 1) * - sizeof(struct sn_flush_device_kernel *), - GFP_KERNEL); - memset(hubdev->hdi_flush_nasid_list.widget_p, 0x0, - (HUB_WIDGET_ID_MAX + 1) * - sizeof(struct sn_flush_device_kernel *)); + kzalloc(size, GFP_KERNEL); + if (!hubdev->hdi_flush_nasid_list.widget_p) + BUG(); for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) { - sn_flush_device_kernel = kmalloc(DEV_PER_WIDGET * - sizeof(struct - sn_flush_device_kernel), - GFP_KERNEL); + size = DEV_PER_WIDGET * + sizeof(struct sn_flush_device_kernel); + sn_flush_device_kernel = kzalloc(size, GFP_KERNEL); if (!sn_flush_device_kernel) BUG(); - memset(sn_flush_device_kernel, 0x0, - DEV_PER_WIDGET * - sizeof(struct sn_flush_device_kernel)); dev_entry = sn_flush_device_kernel; for (device = 0; device < DEV_PER_WIDGET; device++,dev_entry++) { - dev_entry->common = kmalloc(sizeof(struct - sn_flush_device_common), - GFP_KERNEL); + size = sizeof(struct sn_flush_device_common); + dev_entry->common = kzalloc(size, GFP_KERNEL); if (!dev_entry->common) BUG(); - memset(dev_entry->common, 0x0, sizeof(struct - sn_flush_device_common)); if (sn_prom_feature_available( PRF_DEVICE_FLUSH_LIST)) status = sal_get_device_dmaflush_list( - nasid, - widget, - device, - (u64)(dev_entry->common)); + nasid, widget, device, + (u64)(dev_entry->common)); else status = sn_device_fixup_war(nasid, - widget, - device, - dev_entry->common); + widget, device, + dev_entry->common); if (status != SALRET_OK) panic("SAL call failed: %s\n", ia64_sal_strerror(status)); @@ -383,13 +372,12 @@ void sn_pci_fixup_slot(struct pci_dev *dev) pci_dev_get(dev); /* for the sysdata pointer */ pcidev_info = kzalloc(sizeof(struct pcidev_info), GFP_KERNEL); - if (pcidev_info <= 0) + if (!pcidev_info) BUG(); /* Cannot afford to run out of memory */ - sn_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_KERNEL); - if (sn_irq_info <= 0) + sn_irq_info = kzalloc(sizeof(struct sn_irq_info), GFP_KERNEL); + if (!sn_irq_info) BUG(); /* Cannot afford to run out of memory */ - memset(sn_irq_info, 0, sizeof(struct sn_irq_info)); /* Call to retrieve pci device information needed by kernel. */ status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number, @@ -482,13 +470,13 @@ void sn_pci_fixup_slot(struct pci_dev *dev) */ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) { - int status = 0; + int status; int nasid, cnode; struct pci_controller *controller; struct sn_pci_controller *sn_controller; struct pcibus_bussoft *prom_bussoft_ptr; struct hubdev_info *hubdev_info; - void *provider_soft = NULL; + void *provider_soft; struct sn_pcibus_provider *provider; status = sal_get_pcibus_info((u64) segment, (u64) busnum, @@ -535,6 +523,8 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) bus->sysdata = controller; if (provider->bus_fixup) provider_soft = (*provider->bus_fixup) (prom_bussoft_ptr, controller); + else + provider_soft = NULL; if (provider_soft == NULL) { /* fixup failed or not applicable */ @@ -638,13 +628,8 @@ void sn_bus_free_sysdata(void) static int __init sn_pci_init(void) { - int i = 0; - int j = 0; + int i, j; struct pci_dev *pci_dev = NULL; - extern void sn_init_cpei_timer(void); -#ifdef CONFIG_PROC_FS - extern void register_sn_procfs(void); -#endif if (!ia64_platform_is("sn2") || IS_RUNNING_ON_FAKE_PROM()) return 0; @@ -700,32 +685,29 @@ static int __init sn_pci_init(void) */ void hubdev_init_node(nodepda_t * npda, cnodeid_t node) { - struct hubdev_info *hubdev_info; + int size; + pg_data_t *pg; + + size = sizeof(struct hubdev_info); if (node >= num_online_nodes()) /* Headless/memless IO nodes */ - hubdev_info = - (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(0), - sizeof(struct - hubdev_info)); + pg = NODE_DATA(0); else - hubdev_info = - (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(node), - sizeof(struct - hubdev_info)); - npda->pdinfo = (void *)hubdev_info; + pg = NODE_DATA(node); + hubdev_info = (struct hubdev_info *)alloc_bootmem_node(pg, size); + + npda->pdinfo = (void *)hubdev_info; } geoid_t cnodeid_get_geoid(cnodeid_t cnode) { - struct hubdev_info *hubdev; hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); return hubdev->hdi_geoid; - } subsys_initcall(sn_pci_init); diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c index 81c63b2f8ae9..6ae276d5d50c 100644 --- a/arch/ia64/sn/kernel/sn2/prominfo_proc.c +++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (C) 1999,2001-2004, 2006 Silicon Graphics, Inc. All Rights Reserved. * * Module to export the system's Firmware Interface Tables, including * PROM revision numbers and banners, in /proc @@ -190,7 +190,7 @@ static int read_version_entry(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = 0; + int len; /* data holds the NASID of the node */ len = dump_version(page, (unsigned long)data); @@ -202,7 +202,7 @@ static int read_fit_entry(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = 0; + int len; /* data holds the NASID of the node */ len = dump_fit(page, (unsigned long)data); @@ -229,13 +229,16 @@ int __init prominfo_init(void) struct proc_dir_entry *p; cnodeid_t cnodeid; unsigned long nasid; + int size; char name[NODE_NAME_LEN]; if (!ia64_platform_is("sn2")) return 0; - proc_entries = kmalloc(num_online_nodes() * sizeof(struct proc_dir_entry *), - GFP_KERNEL); + size = num_online_nodes() * sizeof(struct proc_dir_entry *); + proc_entries = kzalloc(size, GFP_KERNEL); + if (!proc_entries) + return -ENOMEM; sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL); @@ -244,14 +247,12 @@ int __init prominfo_init(void) sprintf(name, "node%d", cnodeid); *entp = proc_mkdir(name, sgi_prominfo_entry); nasid = cnodeid_to_nasid(cnodeid); - p = create_proc_read_entry( - "fit", 0, *entp, read_fit_entry, - (void *)nasid); + p = create_proc_read_entry("fit", 0, *entp, read_fit_entry, + (void *)nasid); if (p) p->owner = THIS_MODULE; - p = create_proc_read_entry( - "version", 0, *entp, read_version_entry, - (void *)nasid); + p = create_proc_read_entry("version", 0, *entp, + read_version_entry, (void *)nasid); if (p) p->owner = THIS_MODULE; entp++; @@ -263,7 +264,7 @@ int __init prominfo_init(void) void __exit prominfo_exit(void) { struct proc_dir_entry **entp; - unsigned cnodeid; + unsigned int cnodeid; char name[NODE_NAME_LEN]; entp = proc_entries; diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index f153a4c35c70..24eefb2fc55f 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -46,8 +46,14 @@ DECLARE_PER_CPU(struct ptc_stats, ptcstats); static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); -void sn2_ptc_deadlock_recovery(short *, short, short, int, volatile unsigned long *, unsigned long, - volatile unsigned long *, unsigned long); +extern unsigned long +sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); +void +sn2_ptc_deadlock_recovery(short *, short, short, int, + volatile unsigned long *, unsigned long, + volatile unsigned long *, unsigned long); /* * Note: some is the following is captured here to make degugging easier @@ -59,16 +65,6 @@ void sn2_ptc_deadlock_recovery(short *, short, short, int, volatile unsigned lon #define reset_max_active_on_deadlock() 1 #define PTC_LOCK(sh1) ((sh1) ? &sn2_global_ptc_lock : &sn_nodepda->ptc_lock) -static inline void ptc_lock(int sh1, unsigned long *flagp) -{ - spin_lock_irqsave(PTC_LOCK(sh1), *flagp); -} - -static inline void ptc_unlock(int sh1, unsigned long flags) -{ - spin_unlock_irqrestore(PTC_LOCK(sh1), flags); -} - struct ptc_stats { unsigned long ptc_l; unsigned long change_rid; @@ -82,6 +78,8 @@ struct ptc_stats { unsigned long shub_ptc_flushes_not_my_mm; }; +#define sn2_ptctest 0 + static inline unsigned long wait_piowc(void) { volatile unsigned long *piows; @@ -200,7 +198,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, max_active = max_active_pio(shub1); itc = ia64_get_itc(); - ptc_lock(shub1, &flags); + spin_lock_irqsave(PTC_LOCK(shub1), flags); itc2 = ia64_get_itc(); __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; @@ -258,7 +256,7 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, ia64_srlz_d(); } - ptc_unlock(shub1, flags); + spin_unlock_irqrestore(PTC_LOCK(shub1), flags); preempt_enable(); } @@ -270,11 +268,12 @@ sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start, * TLB flush transaction. The recovery sequence is somewhat tricky & is * coded in assembly language. */ -void sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, volatile unsigned long *ptc0, unsigned long data0, - volatile unsigned long *ptc1, unsigned long data1) + +void +sn2_ptc_deadlock_recovery(short *nasids, short ib, short ie, int mynasid, + volatile unsigned long *ptc0, unsigned long data0, + volatile unsigned long *ptc1, unsigned long data1) { - extern unsigned long sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, - volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long); short nasid, i; unsigned long *piows, zeroval, n; diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c index a06719d752a0..c686d9c12f7b 100644 --- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -6,11 +6,11 @@ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #include -#include #ifdef CONFIG_PROC_FS #include #include +#include #include static int partition_id_show(struct seq_file *s, void *p) @@ -90,10 +90,10 @@ static int coherence_id_open(struct inode *inode, struct file *file) return single_open(file, coherence_id_show, NULL); } -static struct proc_dir_entry *sn_procfs_create_entry( - const char *name, struct proc_dir_entry *parent, - int (*openfunc)(struct inode *, struct file *), - int (*releasefunc)(struct inode *, struct file *)) +static struct proc_dir_entry +*sn_procfs_create_entry(const char *name, struct proc_dir_entry *parent, + int (*openfunc)(struct inode *, struct file *), + int (*releasefunc)(struct inode *, struct file *)) { struct proc_dir_entry *e = create_proc_entry(name, 0444, parent); @@ -126,24 +126,24 @@ void register_sn_procfs(void) return; sn_procfs_create_entry("partition_id", sgi_proc_dir, - partition_id_open, single_release); + partition_id_open, single_release); sn_procfs_create_entry("system_serial_number", sgi_proc_dir, - system_serial_number_open, single_release); + system_serial_number_open, single_release); sn_procfs_create_entry("licenseID", sgi_proc_dir, - licenseID_open, single_release); + licenseID_open, single_release); e = sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir, - sn_force_interrupt_open, single_release); + sn_force_interrupt_open, single_release); if (e) e->proc_fops->write = sn_force_interrupt_write_proc; sn_procfs_create_entry("coherence_id", sgi_proc_dir, - coherence_id_open, single_release); + coherence_id_open, single_release); sn_procfs_create_entry("sn_topology", sgi_proc_dir, - sn_topology_open, sn_topology_release); + sn_topology_open, sn_topology_release); } #endif /* CONFIG_PROC_FS */ diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c index d263d3e8fbb9..8a56f8b5ffa2 100644 --- a/arch/ia64/sn/kernel/tiocx.c +++ b/arch/ia64/sn/kernel/tiocx.c @@ -284,12 +284,10 @@ struct sn_irq_info *tiocx_irq_alloc(nasid_t nasid, int widget, int irq, if ((nasid & 1) == 0) return NULL; - sn_irq_info = kmalloc(sn_irq_size, GFP_KERNEL); + sn_irq_info = kzalloc(sn_irq_size, GFP_KERNEL); if (sn_irq_info == NULL) return NULL; - memset(sn_irq_info, 0x0, sn_irq_size); - status = tiocx_intr_alloc(nasid, widget, __pa(sn_irq_info), irq, req_nasid, slice); if (status) { diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 5a36292388eb..b4b84c269210 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -335,10 +335,10 @@ int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size) */ SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE, - pci_domain_nr(bus), bus->number, - 0, /* io */ - 0, /* read */ - port, size, __pa(val)); + pci_domain_nr(bus), bus->number, + 0, /* io */ + 0, /* read */ + port, size, __pa(val)); if (isrv.status == 0) return size; @@ -381,10 +381,10 @@ int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) */ SAL_CALL(isrv, SN_SAL_IOIF_PCI_SAFE, - pci_domain_nr(bus), bus->number, - 0, /* io */ - 1, /* write */ - port, size, __pa(&val)); + pci_domain_nr(bus), bus->number, + 0, /* io */ + 1, /* write */ + port, size, __pa(&val)); if (isrv.status == 0) return size; diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c index aa3fa5152a32..1f0253bfe0a0 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_ate.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2001-2006 Silicon Graphics, Inc. All rights reserved. */ #include @@ -12,7 +12,7 @@ #include #include -int pcibr_invalidate_ate = 0; /* by default don't invalidate ATE on free */ +int pcibr_invalidate_ate; /* by default don't invalidate ATE on free */ /* * mark_ate: Mark the ate as either free or inuse. @@ -20,14 +20,12 @@ int pcibr_invalidate_ate = 0; /* by default don't invalidate ATE on free */ static void mark_ate(struct ate_resource *ate_resource, int start, int number, u64 value) { - u64 *ate = ate_resource->ate; int index; int length = 0; for (index = start; length < number; index++, length++) ate[index] = value; - } /* @@ -37,7 +35,6 @@ static void mark_ate(struct ate_resource *ate_resource, int start, int number, static int find_free_ate(struct ate_resource *ate_resource, int start, int count) { - u64 *ate = ate_resource->ate; int index; int start_free; @@ -70,12 +67,10 @@ static int find_free_ate(struct ate_resource *ate_resource, int start, static inline void free_ate_resource(struct ate_resource *ate_resource, int start) { - mark_ate(ate_resource, start, ate_resource->ate[start], 0); if ((ate_resource->lowest_free_index > start) || (ate_resource->lowest_free_index < 0)) ate_resource->lowest_free_index = start; - } /* @@ -84,7 +79,6 @@ static inline void free_ate_resource(struct ate_resource *ate_resource, static inline int alloc_ate_resource(struct ate_resource *ate_resource, int ate_needed) { - int start_index; /* @@ -118,19 +112,12 @@ static inline int alloc_ate_resource(struct ate_resource *ate_resource, */ int pcibr_ate_alloc(struct pcibus_info *pcibus_info, int count) { - int status = 0; - u64 flag; + int status; + unsigned long flags; - flag = pcibr_lock(pcibus_info); + spin_lock_irqsave(&pcibus_info->pbi_lock, flags); status = alloc_ate_resource(&pcibus_info->pbi_int_ate_resource, count); - - if (status < 0) { - /* Failed to allocate */ - pcibr_unlock(pcibus_info, flag); - return -1; - } - - pcibr_unlock(pcibus_info, flag); + spin_unlock_irqrestore(&pcibus_info->pbi_lock, flags); return status; } @@ -182,7 +169,7 @@ void pcibr_ate_free(struct pcibus_info *pcibus_info, int index) ate_write(pcibus_info, index, count, (ate & ~PCI32_ATE_V)); } - flags = pcibr_lock(pcibus_info); + spin_lock_irqsave(&pcibus_info->pbi_lock, flags); free_ate_resource(&pcibus_info->pbi_int_ate_resource, index); - pcibr_unlock(pcibus_info, flags); + spin_unlock_irqrestore(&pcibus_info->pbi_lock, flags); } diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index 54ce5b7ceed2..9f86bb6519aa 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -137,14 +137,12 @@ pcibr_dmatrans_direct64(struct pcidev_info * info, u64 paddr, pci_addr |= PCI64_ATTR_VIRTUAL; return pci_addr; - } static dma_addr_t pcibr_dmatrans_direct32(struct pcidev_info * info, u64 paddr, size_t req_size, u64 flags) { - struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info; struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info-> pdi_pcibus_info; @@ -171,7 +169,6 @@ pcibr_dmatrans_direct32(struct pcidev_info * info, } return PCI32_DIRECT_BASE | offset; - } /* @@ -218,9 +215,8 @@ void sn_dma_flush(u64 addr) u64 flags; u64 itte; struct hubdev_info *hubinfo; - volatile struct sn_flush_device_kernel *p; - volatile struct sn_flush_device_common *common; - + struct sn_flush_device_kernel *p; + struct sn_flush_device_common *common; struct sn_flush_nasid_entry *flush_nasid_list; if (!sn_ioif_inited) @@ -310,8 +306,7 @@ void sn_dma_flush(u64 addr) (common->sfdl_slot - 1)); } } else { - spin_lock_irqsave((spinlock_t *)&p->sfdl_flush_lock, - flags); + spin_lock_irqsave(&p->sfdl_flush_lock, flags); *common->sfdl_flush_addr = 0; /* force an interrupt. */ @@ -322,8 +317,7 @@ void sn_dma_flush(u64 addr) cpu_relax(); /* okay, everything is synched up. */ - spin_unlock_irqrestore((spinlock_t *)&p->sfdl_flush_lock, - flags); + spin_unlock_irqrestore(&p->sfdl_flush_lock, flags); } return; } diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index 2fac27049bf6..98f716bd92f0 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -163,9 +163,12 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont /* Setup the PMU ATE map */ soft->pbi_int_ate_resource.lowest_free_index = 0; soft->pbi_int_ate_resource.ate = - kmalloc(soft->pbi_int_ate_size * sizeof(u64), GFP_KERNEL); - memset(soft->pbi_int_ate_resource.ate, 0, - (soft->pbi_int_ate_size * sizeof(u64))); + kzalloc(soft->pbi_int_ate_size * sizeof(u64), GFP_KERNEL); + + if (!soft->pbi_int_ate_resource.ate) { + kfree(soft); + return NULL; + } if (prom_bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) { /* TIO PCI Bridge: find nearest node with CPUs */ diff --git a/include/asm-ia64/sn/bte.h b/include/asm-ia64/sn/bte.h index 01e5b4103235..5335d87ca5f8 100644 --- a/include/asm-ia64/sn/bte.h +++ b/include/asm-ia64/sn/bte.h @@ -46,7 +46,7 @@ #define BTES_PER_NODE (is_shub2() ? 4 : 2) #define MAX_BTES_PER_NODE 4 -#define BTE2OFF_CTRL (0) +#define BTE2OFF_CTRL 0 #define BTE2OFF_SRC (SH2_BT_ENG_SRC_ADDR_0 - SH2_BT_ENG_CSR_0) #define BTE2OFF_DEST (SH2_BT_ENG_DEST_ADDR_0 - SH2_BT_ENG_CSR_0) #define BTE2OFF_NOTIFY (SH2_BT_ENG_NOTIF_ADDR_0 - SH2_BT_ENG_CSR_0) @@ -75,11 +75,11 @@ : base + (BTEOFF_NOTIFY/8)) /* Define hardware modes */ -#define BTE_NOTIFY (IBCT_NOTIFY) +#define BTE_NOTIFY IBCT_NOTIFY #define BTE_NORMAL BTE_NOTIFY #define BTE_ZERO_FILL (BTE_NOTIFY | IBCT_ZFIL_MODE) /* Use a reserved bit to let the caller specify a wait for any BTE */ -#define BTE_WACQUIRE (0x4000) +#define BTE_WACQUIRE 0x4000 /* Use the BTE on the node with the destination memory */ #define BTE_USE_DEST (BTE_WACQUIRE << 1) /* Use any available BTE interface on any node for the transfer */ diff --git a/include/asm-ia64/sn/pcibr_provider.h b/include/asm-ia64/sn/pcibr_provider.h index 9334078b089a..a601d3af39b6 100644 --- a/include/asm-ia64/sn/pcibr_provider.h +++ b/include/asm-ia64/sn/pcibr_provider.h @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1992-1997,2000-2006 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H #define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H @@ -115,18 +115,6 @@ struct pcibus_info { spinlock_t pbi_lock; }; -/* - * pcibus_info structure locking macros - */ -inline static unsigned long -pcibr_lock(struct pcibus_info *pcibus_info) -{ - unsigned long flag; - spin_lock_irqsave(&pcibus_info->pbi_lock, flag); - return(flag); -} -#define pcibr_unlock(pcibus_info, flag) spin_unlock_irqrestore(&pcibus_info->pbi_lock, flag) - extern int pcibr_init_provider(void); extern void *pcibr_bus_fixup(struct pcibus_bussoft *, struct pci_controller *); extern dma_addr_t pcibr_dma_map(struct pci_dev *, unsigned long, size_t); diff --git a/include/asm-ia64/sn/sn_feature_sets.h b/include/asm-ia64/sn/sn_feature_sets.h index 9ca642cad338..ff33e3bd3f8e 100644 --- a/include/asm-ia64/sn/sn_feature_sets.h +++ b/include/asm-ia64/sn/sn_feature_sets.h @@ -12,9 +12,6 @@ */ -#include -#include - /* --------------------- PROM Features -----------------------------*/ extern int sn_prom_feature_available(int id); -- cgit v1.2.3 From d3454344b3507042e5d561d0cfed19e99cf2fc88 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Mon, 13 Feb 2006 05:32:09 -0500 Subject: [IA64] remove obsolete corporate address Remove obsolete SGI address Signed-off-by: Jes Sorensen Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/sn2/timer_interrupt.c | 7 +------ drivers/ide/pci/sgiioc4.c | 5 ----- include/asm-ia64/machvec_sn2.h | 7 +------ 3 files changed, 2 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c index adf5db2e2afe..fa7f69945917 100644 --- a/arch/ia64/sn/kernel/sn2/timer_interrupt.c +++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c @@ -1,7 +1,7 @@ /* * * - * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2005, 2006 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -22,11 +22,6 @@ * License along with this program; if not, write the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. * - * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, - * Mountain View, CA 94043, or: - * - * http://www.sgi.com - * * For further information regarding this notice, see: * * http://oss.sgi.com/projects/GenInfo/NoticeExplan diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c index 2b286e865163..43b96e298363 100644 --- a/drivers/ide/pci/sgiioc4.c +++ b/drivers/ide/pci/sgiioc4.c @@ -13,11 +13,6 @@ * License along with this program; if not, write the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. * - * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, - * Mountain View, CA 94043, or: - * - * http://www.sgi.com - * * For further information regarding this notice, see: * * http://oss.sgi.com/projects/GenInfo/NoticeExplan diff --git a/include/asm-ia64/machvec_sn2.h b/include/asm-ia64/machvec_sn2.h index e1b6cd63f49e..03d00faf03b5 100644 --- a/include/asm-ia64/machvec_sn2.h +++ b/include/asm-ia64/machvec_sn2.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002-2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2002-2003, 2006 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -20,11 +20,6 @@ * License along with this program; if not, write the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. * - * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, - * Mountain View, CA 94043, or: - * - * http://www.sgi.com - * * For further information regarding this notice, see: * * http://oss.sgi.com/projects/GenInfo/NoticeExplan -- cgit v1.2.3 From c2a4969ba14e852bf4ee92c7db3b0cf82405a0c9 Mon Sep 17 00:00:00 2001 From: Dean Roe Date: Tue, 14 Feb 2006 15:01:23 -0600 Subject: [IA64-SGI] fix the size of __sn_cnodeid_to_nasid The __sn_cnodeid_to_nasid array was incorrectly sized at MAX_NUMNODES. On a large system, this array could overflow. The following patch corrects this by defining it to MAX_COMPACT_NODES. Signed-off-by: Dean Roe Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/setup.c | 2 +- include/asm-ia64/sn/arch.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 1672ecad77e2..5b84836c2171 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -75,7 +75,7 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); EXPORT_PER_CPU_SYMBOL(__sn_hub_info); -DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]); +DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); diff --git a/include/asm-ia64/sn/arch.h b/include/asm-ia64/sn/arch.h index 1a3831c04af6..91c31be87b13 100644 --- a/include/asm-ia64/sn/arch.h +++ b/include/asm-ia64/sn/arch.h @@ -70,7 +70,7 @@ DECLARE_PER_CPU(struct sn_hub_info_s, __sn_hub_info); * Compact node ID to nasid mappings kept in the per-cpu data areas of each * cpu. */ -DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]); +DECLARE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_COMPACT_NODES]); #define sn_cnodeid_to_nasid (&__get_cpu_var(__sn_cnodeid_to_nasid[0])) -- cgit v1.2.3 From 4c2cd96696ae0896ce4bcf725b9f0eaffafeb640 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Wed, 15 Feb 2006 08:02:21 -0600 Subject: [IA64-SGI] enforce proper ordering of callouts by XPC Fix XPC so that it does not deliver any messages until the connected callout has returned, as well as, prevent the disconnected callout to occur before the disconnecting callout has returned. Signed-off-by: Dean Nelson Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/xpc_channel.c | 8 +++++--- arch/ia64/sn/kernel/xpc_main.c | 20 +++++++++++++------- include/asm-ia64/sn/xpc.h | 31 ++++++++++++++++++------------- 3 files changed, 36 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c index 36e5437a0fb6..cdf6856ce089 100644 --- a/arch/ia64/sn/kernel/xpc_channel.c +++ b/arch/ia64/sn/kernel/xpc_channel.c @@ -738,7 +738,9 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) /* make sure all activity has settled down first */ - if (atomic_read(&ch->references) > 0) { + if (atomic_read(&ch->references) > 0 || + ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE))) { return; } DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); @@ -775,7 +777,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) /* both sides are disconnected now */ - if (ch->flags & XPC_C_CONNECTCALLOUT) { + if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) { spin_unlock_irqrestore(&ch->lock, *irq_flags); xpc_disconnect_callout(ch, xpcDisconnected); spin_lock_irqsave(&ch->lock, *irq_flags); @@ -1300,7 +1302,7 @@ xpc_process_msg_IPI(struct xpc_partition *part, int ch_number) "delivered=%d, partid=%d, channel=%d\n", nmsgs_sent, ch->partid, ch->number); - if (ch->flags & XPC_C_CONNECTCALLOUT) { + if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) { xpc_activate_kthreads(ch, nmsgs_sent); } } diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index 9cd460dfe27e..8cbf16432570 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c @@ -750,12 +750,16 @@ xpc_daemonize_kthread(void *args) /* let registerer know that connection has been established */ spin_lock_irqsave(&ch->lock, irq_flags); - if (!(ch->flags & XPC_C_CONNECTCALLOUT)) { - ch->flags |= XPC_C_CONNECTCALLOUT; + if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) { + ch->flags |= XPC_C_CONNECTEDCALLOUT; spin_unlock_irqrestore(&ch->lock, irq_flags); xpc_connected_callout(ch); + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE; + spin_unlock_irqrestore(&ch->lock, irq_flags); + /* * It is possible that while the callout was being * made that the remote partition sent some messages. @@ -777,15 +781,17 @@ xpc_daemonize_kthread(void *args) if (atomic_dec_return(&ch->kthreads_assigned) == 0) { spin_lock_irqsave(&ch->lock, irq_flags); - if ((ch->flags & XPC_C_CONNECTCALLOUT) && - !(ch->flags & XPC_C_DISCONNECTCALLOUT)) { - ch->flags |= XPC_C_DISCONNECTCALLOUT; + if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { + ch->flags |= XPC_C_DISCONNECTINGCALLOUT; spin_unlock_irqrestore(&ch->lock, irq_flags); xpc_disconnect_callout(ch, xpcDisconnecting); - } else { - spin_unlock_irqrestore(&ch->lock, irq_flags); + + spin_lock_irqsave(&ch->lock, irq_flags); + ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; } + spin_unlock_irqrestore(&ch->lock, irq_flags); if (atomic_dec_return(&part->nchannels_engaged) == 0) { xpc_mark_partition_disengaged(part); xpc_IPI_send_disengage(part); diff --git a/include/asm-ia64/sn/xpc.h b/include/asm-ia64/sn/xpc.h index 0c36928ffd8b..df7f5f4f3cde 100644 --- a/include/asm-ia64/sn/xpc.h +++ b/include/asm-ia64/sn/xpc.h @@ -508,19 +508,24 @@ struct xpc_channel { #define XPC_C_OPENREQUEST 0x00000010 /* local open channel request */ #define XPC_C_SETUP 0x00000020 /* channel's msgqueues are alloc'd */ -#define XPC_C_CONNECTCALLOUT 0x00000040 /* channel connected callout made */ -#define XPC_C_CONNECTED 0x00000080 /* local channel is connected */ -#define XPC_C_CONNECTING 0x00000100 /* channel is being connected */ - -#define XPC_C_RCLOSEREPLY 0x00000200 /* remote close channel reply */ -#define XPC_C_CLOSEREPLY 0x00000400 /* local close channel reply */ -#define XPC_C_RCLOSEREQUEST 0x00000800 /* remote close channel request */ -#define XPC_C_CLOSEREQUEST 0x00001000 /* local close channel request */ - -#define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */ -#define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */ -#define XPC_C_DISCONNECTCALLOUT 0x00008000 /* chan disconnected callout made */ -#define XPC_C_WDISCONNECT 0x00010000 /* waiting for channel disconnect */ +#define XPC_C_CONNECTEDCALLOUT 0x00000040 /* connected callout initiated */ +#define XPC_C_CONNECTEDCALLOUT_MADE \ + 0x00000080 /* connected callout completed */ +#define XPC_C_CONNECTED 0x00000100 /* local channel is connected */ +#define XPC_C_CONNECTING 0x00000200 /* channel is being connected */ + +#define XPC_C_RCLOSEREPLY 0x00000400 /* remote close channel reply */ +#define XPC_C_CLOSEREPLY 0x00000800 /* local close channel reply */ +#define XPC_C_RCLOSEREQUEST 0x00001000 /* remote close channel request */ +#define XPC_C_CLOSEREQUEST 0x00002000 /* local close channel request */ + +#define XPC_C_DISCONNECTED 0x00004000 /* channel is disconnected */ +#define XPC_C_DISCONNECTING 0x00008000 /* channel is being disconnected */ +#define XPC_C_DISCONNECTINGCALLOUT \ + 0x00010000 /* disconnecting callout initiated */ +#define XPC_C_DISCONNECTINGCALLOUT_MADE \ + 0x00020000 /* disconnecting callout completed */ +#define XPC_C_WDISCONNECT 0x00040000 /* waiting for channel disconnect */ -- cgit v1.2.3 From defbb2c929cbe89dc92239b303cd33d3c85e9a83 Mon Sep 17 00:00:00 2001 From: "hawkes@sgi.com" Date: Tue, 14 Feb 2006 10:40:17 -0800 Subject: [IA64] ia64: simplify and fix udelay() The original ia64 udelay() was simple, but flawed for platforms without synchronized ITCs: a preemption and migration to another CPU during the while-loop likely resulted in too-early termination or very, very lengthy looping. The first fix (now in 2.6.15) broke the delay loop into smaller, non-preemptible chunks, reenabling preemption between the chunks. This fix is flawed in that the total udelay is computed to be the sum of just the non-premptible while-loop pieces, i.e., not counting the time spent in the interim preemptible periods. If an interrupt or a migration occurs during one of these interim periods, then that time is invisible and only serves to lengthen the effective udelay(). This new fix backs out the current flawed fix and returns to a simple udelay(), fully preemptible and interruptible. It implements two simple alternative udelay() routines: one a default generic version that uses ia64_get_itc(), and the other an sn-specific version that uses that platform's RTC. Signed-off-by: John Hawkes Signed-off-by: Tony Luck --- arch/ia64/kernel/time.c | 39 +++++++++++++++++---------------------- arch/ia64/sn/kernel/sn2/timer.c | 19 +++++++++++++++++++ include/asm-ia64/timex.h | 2 ++ 3 files changed, 38 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index a094ec49ccfa..307d01e15b2e 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -250,32 +250,27 @@ time_init (void) set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); } -#define SMALLUSECS 100 - -void -udelay (unsigned long usecs) +/* + * Generic udelay assumes that if preemption is allowed and the thread + * migrates to another CPU, that the ITC values are synchronized across + * all CPUs. + */ +static void +ia64_itc_udelay (unsigned long usecs) { - unsigned long start; - unsigned long cycles; - unsigned long smallusecs; + unsigned long start = ia64_get_itc(); + unsigned long end = start + usecs*local_cpu_data->cyc_per_usec; - /* - * Execute the non-preemptible delay loop (because the ITC might - * not be synchronized between CPUS) in relatively short time - * chunks, allowing preemption between the chunks. - */ - while (usecs > 0) { - smallusecs = (usecs > SMALLUSECS) ? SMALLUSECS : usecs; - preempt_disable(); - cycles = smallusecs*local_cpu_data->cyc_per_usec; - start = ia64_get_itc(); + while (time_before(ia64_get_itc(), end)) + cpu_relax(); +} - while (ia64_get_itc() - start < cycles) - cpu_relax(); +void (*ia64_udelay)(unsigned long usecs) = &ia64_itc_udelay; - preempt_enable(); - usecs -= smallusecs; - } +void +udelay (unsigned long usecs) +{ + (*ia64_udelay)(usecs); } EXPORT_SYMBOL(udelay); diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c index deb9baf4d473..56a88b6df4b4 100644 --- a/arch/ia64/sn/kernel/sn2/timer.c +++ b/arch/ia64/sn/kernel/sn2/timer.c @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -28,9 +29,27 @@ static struct time_interpolator sn2_interpolator = { .source = TIME_SOURCE_MMIO64 }; +/* + * sn udelay uses the RTC instead of the ITC because the ITC is not + * synchronized across all CPUs, and the thread may migrate to another CPU + * if preemption is enabled. + */ +static void +ia64_sn_udelay (unsigned long usecs) +{ + unsigned long start = rtc_time(); + unsigned long end = start + + usecs * sn_rtc_cycles_per_second / 1000000; + + while (time_before((unsigned long)rtc_time(), end)) + cpu_relax(); +} + void __init sn_timer_init(void) { sn2_interpolator.frequency = sn_rtc_cycles_per_second; sn2_interpolator.addr = RTC_COUNTER_ADDR; register_time_interpolator(&sn2_interpolator); + + ia64_udelay = &ia64_sn_udelay; } diff --git a/include/asm-ia64/timex.h b/include/asm-ia64/timex.h index 414aae060440..05a6baf8a472 100644 --- a/include/asm-ia64/timex.h +++ b/include/asm-ia64/timex.h @@ -15,6 +15,8 @@ typedef unsigned long cycles_t; +extern void (*ia64_udelay)(unsigned long usecs); + /* * For performance reasons, we don't want to define CLOCK_TICK_TRATE as * local_cpu_data->itc_rate. Fortunately, we don't have to, either: according to George -- cgit v1.2.3 From 48d5cad87c3a4998d0bda16ccfb5c60dfe4de5fb Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Feb 2006 15:10:22 -0800 Subject: [XFRM]: Fix SNAT-related crash in xfrm4_output_finish When a packet matching an IPsec policy is SNATed so it doesn't match any policy anymore it looses its xfrm bundle, which makes xfrm4_output_finish crash because of a NULL pointer dereference. This patch directs these packets to the original output path instead. Since the packets have already passed the POST_ROUTING hook, but need to start at the beginning of the original output path which includes another POST_ROUTING invocation, a flag is added to the IPCB to indicate that the packet was rerouted and doesn't need to pass the POST_ROUTING hook again. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 19 +++++++++++++++---- include/net/ip.h | 1 + include/net/xfrm.h | 1 - net/ipv4/ip_gre.c | 3 ++- net/ipv4/ip_output.c | 16 ++++++++++------ net/ipv4/ipip.c | 3 ++- net/ipv4/xfrm4_output.c | 13 ++++++++++--- 7 files changed, 40 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 4cf6088625c1..3ca3d9ee78a9 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -184,8 +184,11 @@ static inline int nf_hook_thresh(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sk_buff *), int thresh, + int cond) { + if (!cond) + return 1; #ifndef CONFIG_NETFILTER_DEBUG if (list_empty(&nf_hooks[pf][hook])) return 1; @@ -197,7 +200,7 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { - return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN); + return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN, 1); } /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -224,7 +227,13 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ ({int __ret; \ -if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1)\ +if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh, 1)) == 1)\ + __ret = (okfn)(skb); \ +__ret;}) + +#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) \ +({int __ret; \ +if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\ __ret = (okfn)(skb); \ __ret;}) @@ -295,11 +304,13 @@ extern struct proc_dir_entry *proc_net_netfilter; #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) +#define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) static inline int nf_hook_thresh(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *), int thresh) + int (*okfn)(struct sk_buff *), int thresh, + int cond) { return okfn(*pskb); } diff --git a/include/net/ip.h b/include/net/ip.h index 8de0697b364c..fab3d5b3ab1c 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -41,6 +41,7 @@ struct inet_skb_parm #define IPSKB_XFRM_TUNNEL_SIZE 2 #define IPSKB_XFRM_TRANSFORMED 4 #define IPSKB_FRAG_COMPLETE 8 +#define IPSKB_REROUTED 16 }; struct ipcm_cookie diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d09ca0e7d139..d6111a2f0a23 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -866,7 +866,6 @@ extern int xfrm_state_mtu(struct xfrm_state *x, int mtu); extern int xfrm_init_state(struct xfrm_state *x); extern int xfrm4_rcv(struct sk_buff *skb); extern int xfrm4_output(struct sk_buff *skb); -extern int xfrm4_output_finish(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); extern int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index abe23923e4e7..9981dcd68f11 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -830,7 +830,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, gre_hlen); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | + IPSKB_REROUTED); dst_release(skb->dst); skb->dst = &rt->u.dst; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 3324fbfe528a..57d290d89ec2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -207,8 +207,10 @@ static inline int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ - if (skb->dst->xfrm != NULL) - return xfrm4_output_finish(skb); + if (skb->dst->xfrm != NULL) { + IPCB(skb)->flags |= IPSKB_REROUTED; + return dst_output(skb); + } #endif if (skb->len > dst_mtu(skb->dst) && !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) @@ -271,8 +273,9 @@ int ip_mc_output(struct sk_buff *skb) newskb->dev, ip_dev_loopback_xmit); } - return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev, - ip_finish_output); + return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dev, + ip_finish_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); } int ip_output(struct sk_buff *skb) @@ -284,8 +287,9 @@ int ip_output(struct sk_buff *skb) skb->dev = dev; skb->protocol = htons(ETH_P_IP); - return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev, - ip_finish_output); + return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, dev, + ip_finish_output, + !(IPCB(skb)->flags & IPSKB_REROUTED)); } int ip_queue_xmit(struct sk_buff *skb, int ipfragok) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index e5cbe72c6b80..03d13742a4b8 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -622,7 +622,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.raw = skb->nh.raw; skb->nh.raw = skb_push(skb, sizeof(struct iphdr)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | + IPSKB_REROUTED); dst_release(skb->dst); skb->dst = &rt->u.dst; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d4df0ddd424b..32ad229b4fed 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -152,10 +152,16 @@ error_nolock: goto out_exit; } -int xfrm4_output_finish(struct sk_buff *skb) +static int xfrm4_output_finish(struct sk_buff *skb) { int err; +#ifdef CONFIG_NETFILTER + if (!skb->dst->xfrm) { + IPCB(skb)->flags |= IPSKB_REROUTED; + return dst_output(skb); + } +#endif while (likely((err = xfrm4_output_one(skb)) == 0)) { nf_reset(skb); @@ -178,6 +184,7 @@ int xfrm4_output_finish(struct sk_buff *skb) int xfrm4_output(struct sk_buff *skb) { - return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, - xfrm4_output_finish); + return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, + xfrm4_output_finish, + !(IPCB(skb)->flags & IPSKB_REROUTED)); } -- cgit v1.2.3 From 9c92d3486434e7310cb288587953e2dae4a79701 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Feb 2006 15:18:19 -0800 Subject: [NETFILTER]: Don't invoke okfn in CONFIG_NETFILTER=n variant of nf_hook() nf_hook() is supposed to call the netfilter hook and return control of the packet back to the caller in case it may pass, the okfn is only used for queueing. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 3ca3d9ee78a9..468896939843 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -318,7 +318,7 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { - return okfn(*pskb); + return 1; } static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} struct flowi; -- cgit v1.2.3 From 9f672004ab1a8094bec1785b39ac683ab9eebebc Mon Sep 17 00:00:00 2001 From: Christian Trefzer Date: Wed, 15 Feb 2006 15:17:34 -0800 Subject: [PATCH] neofb: avoid resetting display config on unblank (v2) There were two mistakes in the register-read-on-(un)blank approach. - First, without proper register (un)locking the value read back will always be zero, and this is what I missed entirely until just now. Due to this, the logic could not be verified at all and I tried some bogus checks which are completely stupid. - Second, the LCD status bit will always be set to zero when the backlight has been turned off. Reading the value back during unblank will disable the LCD unconditionally, regardless of the state it is supposed to be in, since we set it to zero beforehand. So this is what we do now: - create a new variable in struct neofb_par, and use that to determine whether to read back registers (initialized to true) - before actually blanking the screen, read back the register to sense any possible change made through Fn key combo - use proper neoUnlock() / neoLock() to actually read something - every call to neofb_blank() determines if we read back next time: blanking disables readback, unblanking (FB_BLANK_UNBLANK) enables it This should give us a nice and clean state machine. Has been thoroughly tested on a Dell Latitude CPiA / NM220 Chip docked to a C/Dock2 with attached CRT in all possible combinations of LCD/CRT on/off. I changed the config via Fn key, let the console blank, unblanked by keypress - works flawlessly. Signed-off-by: Christian Trefzer Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/neofb.c | 15 ++++++++++++--- include/video/neomagic.h | 1 + 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/video/neofb.c b/drivers/video/neofb.c index b85e2b180a44..a2e201dc40f7 100644 --- a/drivers/video/neofb.c +++ b/drivers/video/neofb.c @@ -843,6 +843,9 @@ static int neofb_set_par(struct fb_info *info) par->SysIfaceCntl2 = 0xc0; /* VESA Bios sets this to 0x80! */ + /* Initialize: by default, we want display config register to be read */ + par->PanelDispCntlRegRead = 1; + /* Enable any user specified display devices. */ par->PanelDispCntlReg1 = 0x00; if (par->internal_display) @@ -1334,11 +1337,17 @@ static int neofb_blank(int blank_mode, struct fb_info *info) struct neofb_par *par = info->par; int seqflags, lcdflags, dpmsflags, reg; + /* - * Reload the value stored in the register, might have been changed via - * FN keystroke + * Reload the value stored in the register, if sensible. It might have + * been changed via FN keystroke. */ - par->PanelDispCntlReg1 = vga_rgfx(NULL, 0x20) & 0x03; + if (par->PanelDispCntlRegRead) { + neoUnlock(); + par->PanelDispCntlReg1 = vga_rgfx(NULL, 0x20) & 0x03; + neoLock(&par->state); + } + par->PanelDispCntlRegRead = !blank_mode; switch (blank_mode) { case FB_BLANK_POWERDOWN: /* powerdown - both sync lines down */ diff --git a/include/video/neomagic.h b/include/video/neomagic.h index 1d69049bd4c1..78b1f15a538f 100644 --- a/include/video/neomagic.h +++ b/include/video/neomagic.h @@ -159,6 +159,7 @@ struct neofb_par { unsigned char PanelDispCntlReg1; unsigned char PanelDispCntlReg2; unsigned char PanelDispCntlReg3; + unsigned char PanelDispCntlRegRead; unsigned char PanelVertCenterReg1; unsigned char PanelVertCenterReg2; unsigned char PanelVertCenterReg3; -- cgit v1.2.3 From 5f6164f3092832e0d9b12eed52e09a76bf39c64a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 15 Feb 2006 15:17:39 -0800 Subject: [PATCH] add asm-generic/mman.h Make new MADV_REMOVE, MADV_DONTFORK, MADV_DOFORK consistent across all arches. The idea is to make it possible to use them portably even before distros include them in libc headers. Move common flags to asm-generic/mman.h Signed-off-by: Michael S. Tsirkin Cc: Roland Dreier Cc: Badari Pulavarty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-alpha/mman.h | 8 +++++--- include/asm-arm/mman.h | 31 +------------------------------ include/asm-arm26/mman.h | 31 +------------------------------ include/asm-cris/mman.h | 31 +------------------------------ include/asm-frv/mman.h | 31 +------------------------------ include/asm-generic/mman.h | 42 ++++++++++++++++++++++++++++++++++++++++++ include/asm-h8300/mman.h | 31 +------------------------------ include/asm-i386/mman.h | 31 +------------------------------ include/asm-ia64/mman.h | 31 +------------------------------ include/asm-m32r/mman.h | 33 ++------------------------------- include/asm-m68k/mman.h | 31 +------------------------------ include/asm-mips/mman.h | 22 ++++++++++++---------- include/asm-parisc/mman.h | 8 +++++--- include/asm-powerpc/mman.h | 32 ++------------------------------ include/asm-s390/mman.h | 31 +------------------------------ include/asm-sh/mman.h | 31 +------------------------------ include/asm-sparc/mman.h | 31 ++----------------------------- include/asm-sparc64/mman.h | 31 ++----------------------------- include/asm-v850/mman.h | 30 +----------------------------- include/asm-x86_64/mman.h | 30 +----------------------------- include/asm-xtensa/mman.h | 22 ++++++++++++---------- 21 files changed, 96 insertions(+), 503 deletions(-) create mode 100644 include/asm-generic/mman.h (limited to 'include') diff --git a/include/asm-alpha/mman.h b/include/asm-alpha/mman.h index a21515c16a43..5f24c755f577 100644 --- a/include/asm-alpha/mman.h +++ b/include/asm-alpha/mman.h @@ -42,9 +42,11 @@ #define MADV_WILLNEED 3 /* will need these pages */ #define MADV_SPACEAVAIL 5 /* ensure resources are available */ #define MADV_DONTNEED 6 /* don't need these pages */ -#define MADV_REMOVE 7 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ + +/* common/generic parameters */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-arm/mman.h b/include/asm-arm/mman.h index 693ed859e632..54570d2e95b7 100644 --- a/include/asm-arm/mman.h +++ b/include/asm-arm/mman.h @@ -1,19 +1,7 @@ #ifndef __ARM_MMAN_H__ #define __ARM_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) page tables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __ARM_MMAN_H__ */ diff --git a/include/asm-arm26/mman.h b/include/asm-arm26/mman.h index 2096c50df888..4000a6c1b76b 100644 --- a/include/asm-arm26/mman.h +++ b/include/asm-arm26/mman.h @@ -1,19 +1,7 @@ #ifndef __ARM_MMAN_H__ #define __ARM_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) page tables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __ARM_MMAN_H__ */ diff --git a/include/asm-cris/mman.h b/include/asm-cris/mman.h index deddfb239ff5..1c35e1b66b46 100644 --- a/include/asm-cris/mman.h +++ b/include/asm-cris/mman.h @@ -3,19 +3,7 @@ /* verbatim copy of asm-i386/ version */ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -25,24 +13,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __CRIS_MMAN_H__ */ diff --git a/include/asm-frv/mman.h b/include/asm-frv/mman.h index d3bca306da82..b4371e928683 100644 --- a/include/asm-frv/mman.h +++ b/include/asm-frv/mman.h @@ -1,19 +1,7 @@ #ifndef __ASM_MMAN_H__ #define __ASM_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,25 +11,8 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __ASM_MMAN_H__ */ diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h new file mode 100644 index 000000000000..3b41d2bb70da --- /dev/null +++ b/include/asm-generic/mman.h @@ -0,0 +1,42 @@ +#ifndef _ASM_GENERIC_MMAN_H +#define _ASM_GENERIC_MMAN_H + +/* + Author: Michael S. Tsirkin , Mellanox Technologies Ltd. + Based on: asm-xxx/mman.h +*/ + +#define PROT_READ 0x1 /* page can be read */ +#define PROT_WRITE 0x2 /* page can be written */ +#define PROT_EXEC 0x4 /* page can be executed */ +#define PROT_SEM 0x8 /* page may be used for atomic ops */ +#define PROT_NONE 0x0 /* page can not be accessed */ +#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ +#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ + +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_TYPE 0x0f /* Mask for type of mapping */ +#define MAP_FIXED 0x10 /* Interpret addr exactly */ +#define MAP_ANONYMOUS 0x20 /* don't use a file */ + +#define MS_ASYNC 1 /* sync memory asynchronously */ +#define MS_INVALIDATE 2 /* invalidate the caches */ +#define MS_SYNC 4 /* synchronous memory sync */ + +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ + +/* compatibility flags */ +#define MAP_ANON MAP_ANONYMOUS +#define MAP_FILE 0 + +#endif diff --git a/include/asm-h8300/mman.h b/include/asm-h8300/mman.h index ac0346f7d11d..b9f104f22a36 100644 --- a/include/asm-h8300/mman.h +++ b/include/asm-h8300/mman.h @@ -1,19 +1,7 @@ #ifndef __H8300_MMAN_H__ #define __H8300_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __H8300_MMAN_H__ */ diff --git a/include/asm-i386/mman.h b/include/asm-i386/mman.h index ab2339a1d807..8fd9d7ab7faf 100644 --- a/include/asm-i386/mman.h +++ b/include/asm-i386/mman.h @@ -1,19 +1,7 @@ #ifndef __I386_MMAN_H__ #define __I386_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __I386_MMAN_H__ */ diff --git a/include/asm-ia64/mman.h b/include/asm-ia64/mman.h index 357ebb780cc0..6ba179f12718 100644 --- a/include/asm-ia64/mman.h +++ b/include/asm-ia64/mman.h @@ -8,19 +8,7 @@ * David Mosberger-Tang , Hewlett-Packard Co */ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x00100 /* stack-like segment */ #define MAP_GROWSUP 0x00200 /* register stack-like segment */ @@ -31,24 +19,7 @@ #define MAP_POPULATE 0x08000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* _ASM_IA64_MMAN_H */ diff --git a/include/asm-m32r/mman.h b/include/asm-m32r/mman.h index 6b02fe3fcff2..695a860c024f 100644 --- a/include/asm-m32r/mman.h +++ b/include/asm-m32r/mman.h @@ -1,21 +1,9 @@ #ifndef __M32R_MMAN_H__ #define __M32R_MMAN_H__ -/* orig : i386 2.6.0-test6 */ - -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ +#include -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +/* orig : i386 2.6.0-test6 */ #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -25,24 +13,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __M32R_MMAN_H__ */ diff --git a/include/asm-m68k/mman.h b/include/asm-m68k/mman.h index efd12bc4ccb7..1626d37f4898 100644 --- a/include/asm-m68k/mman.h +++ b/include/asm-m68k/mman.h @@ -1,19 +1,7 @@ #ifndef __M68K_MMAN_H__ #define __M68K_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __M68K_MMAN_H__ */ diff --git a/include/asm-mips/mman.h b/include/asm-mips/mman.h index 6d01e26830fa..046cf686bee7 100644 --- a/include/asm-mips/mman.h +++ b/include/asm-mips/mman.h @@ -60,17 +60,19 @@ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ /* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 +#define MAP_ANON MAP_ANONYMOUS +#define MAP_FILE 0 #endif /* _ASM_MMAN_H */ diff --git a/include/asm-parisc/mman.h b/include/asm-parisc/mman.h index a381cf5c8f55..0ef15ee0f17e 100644 --- a/include/asm-parisc/mman.h +++ b/include/asm-parisc/mman.h @@ -38,7 +38,11 @@ #define MADV_SPACEAVAIL 5 /* insure that resources are reserved */ #define MADV_VPS_PURGE 6 /* Purge pages from VM page cache */ #define MADV_VPS_INHERIT 7 /* Inherit parents page size */ -#define MADV_REMOVE 8 /* remove these pages & resources */ + +/* common/generic parameters */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ /* The range 12-64 is reserved for page size specification. */ #define MADV_4K_PAGES 12 /* Use 4K pages */ @@ -49,8 +53,6 @@ #define MADV_4M_PAGES 22 /* Use 4 Megabyte pages */ #define MADV_16M_PAGES 24 /* Use 16 Megabyte pages */ #define MADV_64M_PAGES 26 /* Use 64 Megabyte pages */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ /* compatibility flags */ #define MAP_ANON MAP_ANONYMOUS diff --git a/include/asm-powerpc/mman.h b/include/asm-powerpc/mman.h index fcff25d13f13..24cf664a8295 100644 --- a/include/asm-powerpc/mman.h +++ b/include/asm-powerpc/mman.h @@ -1,6 +1,8 @@ #ifndef _ASM_POWERPC_MMAN_H #define _ASM_POWERPC_MMAN_H +#include + /* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -8,19 +10,6 @@ * 2 of the License, or (at your option) any later version. */ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ #define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ #define MAP_NORESERVE 0x40 /* don't reserve swap pages */ #define MAP_LOCKED 0x80 @@ -29,27 +18,10 @@ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* _ASM_POWERPC_MMAN_H */ diff --git a/include/asm-s390/mman.h b/include/asm-s390/mman.h index d41ca1477010..7839767d837e 100644 --- a/include/asm-s390/mman.h +++ b/include/asm-s390/mman.h @@ -9,19 +9,7 @@ #ifndef __S390_MMAN_H__ #define __S390_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -31,24 +19,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __S390_MMAN_H__ */ diff --git a/include/asm-sh/mman.h b/include/asm-sh/mman.h index 0e08d0573abc..156eb0225cf6 100644 --- a/include/asm-sh/mman.h +++ b/include/asm-sh/mman.h @@ -1,19 +1,7 @@ #ifndef __ASM_SH_MMAN_H #define __ASM_SH_MMAN_H -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -23,24 +11,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) page tables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __ASM_SH_MMAN_H */ diff --git a/include/asm-sparc/mman.h b/include/asm-sparc/mman.h index 4a298b2be859..88d1886abf3b 100644 --- a/include/asm-sparc/mman.h +++ b/include/asm-sparc/mman.h @@ -2,21 +2,10 @@ #ifndef __SPARC_MMAN_H__ #define __SPARC_MMAN_H__ -/* SunOS'ified... */ +#include -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ +/* SunOS'ified... */ -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ #define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ #define MAP_NORESERVE 0x40 /* don't reserve swap pages */ #define MAP_INHERIT 0x80 /* SunOS doesn't do this, but... */ @@ -27,10 +16,6 @@ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ @@ -48,18 +33,6 @@ #define MC_LOCKAS 5 /* Lock an entire address space of the calling process */ #define MC_UNLOCKAS 6 /* Unlock entire address space of calling process */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_FREE 0x5 /* (Solaris) contents can be freed */ -#define MADV_REMOVE 0x6 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 #endif /* __SPARC_MMAN_H__ */ diff --git a/include/asm-sparc64/mman.h b/include/asm-sparc64/mman.h index d705ec92da8b..6fd878e61435 100644 --- a/include/asm-sparc64/mman.h +++ b/include/asm-sparc64/mman.h @@ -2,21 +2,10 @@ #ifndef __SPARC64_MMAN_H__ #define __SPARC64_MMAN_H__ -/* SunOS'ified... */ +#include -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_SEM 0x8 /* page may be used for atomic ops */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ +/* SunOS'ified... */ -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ #define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */ #define MAP_NORESERVE 0x40 /* don't reserve swap pages */ #define MAP_INHERIT 0x80 /* SunOS doesn't do this, but... */ @@ -27,10 +16,6 @@ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ #define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ #define MCL_FUTURE 0x4000 /* lock all additions to address space */ @@ -48,18 +33,6 @@ #define MC_LOCKAS 5 /* Lock an entire address space of the calling process */ #define MC_UNLOCKAS 6 /* Unlock entire address space of calling process */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ #define MADV_FREE 0x5 /* (Solaris) contents can be freed */ -#define MADV_REMOVE 0x6 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 #endif /* __SPARC64_MMAN_H__ */ diff --git a/include/asm-v850/mman.h b/include/asm-v850/mman.h index 7b851c310e41..edbf6edbfb37 100644 --- a/include/asm-v850/mman.h +++ b/include/asm-v850/mman.h @@ -1,18 +1,7 @@ #ifndef __V850_MMAN_H__ #define __V850_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ - -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ @@ -20,24 +9,7 @@ #define MAP_LOCKED 0x2000 /* pages are locked */ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif /* __V850_MMAN_H__ */ diff --git a/include/asm-x86_64/mman.h b/include/asm-x86_64/mman.h index b699a38c1c3c..dd5cb0534d37 100644 --- a/include/asm-x86_64/mman.h +++ b/include/asm-x86_64/mman.h @@ -1,19 +1,8 @@ #ifndef __X8664_MMAN_H__ #define __X8664_MMAN_H__ -#define PROT_READ 0x1 /* page can be read */ -#define PROT_WRITE 0x2 /* page can be written */ -#define PROT_EXEC 0x4 /* page can be executed */ -#define PROT_NONE 0x0 /* page can not be accessed */ -#define PROT_SEM 0x8 -#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ -#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ +#include -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_TYPE 0x0f /* Mask for type of mapping */ -#define MAP_FIXED 0x10 /* Interpret addr exactly */ -#define MAP_ANONYMOUS 0x20 /* don't use a file */ #define MAP_32BIT 0x40 /* only give out 32bit addresses */ #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ @@ -24,24 +13,7 @@ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ -#define MS_ASYNC 1 /* sync memory asynchronously */ -#define MS_INVALIDATE 2 /* invalidate the caches */ -#define MS_SYNC 4 /* synchronous memory sync */ - #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ - -/* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 - #endif diff --git a/include/asm-xtensa/mman.h b/include/asm-xtensa/mman.h index e2d7afb679c8..ba394cbb4807 100644 --- a/include/asm-xtensa/mman.h +++ b/include/asm-xtensa/mman.h @@ -67,17 +67,19 @@ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#define MADV_NORMAL 0x0 /* default page-in behavior */ -#define MADV_RANDOM 0x1 /* page-in minimum required */ -#define MADV_SEQUENTIAL 0x2 /* read-ahead aggressively */ -#define MADV_WILLNEED 0x3 /* pre-fault pages */ -#define MADV_DONTNEED 0x4 /* discard these pages */ -#define MADV_REMOVE 0x5 /* remove these pages & resources */ -#define MADV_DONTFORK 0x30 /* dont inherit across fork */ -#define MADV_DOFORK 0x31 /* do inherit across fork */ +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ + +/* common parameters: try to keep these consistent across architectures */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ /* compatibility flags */ -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FILE 0 +#define MAP_ANON MAP_ANONYMOUS +#define MAP_FILE 0 #endif /* _XTENSA_MMAN_H */ -- cgit v1.2.3 From b2ee9dbfad14ba8e34a589d552ddc67300a26bec Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Wed, 15 Feb 2006 15:17:40 -0800 Subject: [PATCH] hrtimer: fix multiple macro argument expansion For two macros the arguments were expanded twice, change them to inline functions to avoid it. Signed-off-by: Roman Zippel Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ktime.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 6aca67a569a2..f3dec45ef874 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -96,10 +96,16 @@ static inline ktime_t ktime_set(const long secs, const unsigned long nsecs) ({ (ktime_t){ .tv64 = (kt).tv64 + (nsval) }; }) /* convert a timespec to ktime_t format: */ -#define timespec_to_ktime(ts) ktime_set((ts).tv_sec, (ts).tv_nsec) +static inline ktime_t timespec_to_ktime(struct timespec ts) +{ + return ktime_set(ts.tv_sec, ts.tv_nsec); +} /* convert a timeval to ktime_t format: */ -#define timeval_to_ktime(tv) ktime_set((tv).tv_sec, (tv).tv_usec * 1000) +static inline ktime_t timeval_to_ktime(struct timeval tv) +{ + return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); +} /* Map the ktime_t to timespec conversion to ns_to_timespec function */ #define ktime_to_timespec(kt) ns_to_timespec((kt).tv64) -- cgit v1.2.3 From 7bbb79403163e047c6e333ff169db34e3c969e65 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 16 Feb 2006 11:08:09 +0000 Subject: [ARM] Fix SMP initialisation oops A change to the SMP initialisation caused the following oops: CPU1: Booted secondary processor CPU1: D VIPT write-back cache CPU1: I cache: 32768 bytes, associativity 4, 32 byte lines, 256 sets CPU1: D cache: 32768 bytes, associativity 4, 32 byte lines, 256 sets <7>Calibrating delay loop... 83.14 BogoMIPS (lpj=415744) <1>Unable to handle kernel NULL pointer dereference at virtual address 0000001c ... PC is at enqueue_task+0x1c/0x64 LR is at activate_task+0xcc/0xe4 SMP initialisation now requires cpu_possible_map to be initialised in setup_arch(). Move this from smp_prepare_cpus() to smp_init_cpus() and call it from our setup_arch() if CONFIG_SMP is enabled. Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 5 +++++ arch/arm/kernel/smp.c | 1 - arch/arm/mach-integrator/platsmp.c | 21 +++++++++++++++------ arch/arm/mach-realview/platsmp.c | 21 +++++++++++++++------ include/asm-arm/smp.h | 5 +++++ 5 files changed, 40 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index c45d10d07bde..68273b4dc882 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -771,6 +772,10 @@ void __init setup_arch(char **cmdline_p) paging_init(&meminfo, mdesc); request_standard_resources(&meminfo, mdesc); +#ifdef CONFIG_SMP + smp_init_cpus(); +#endif + cpu_init(); /* diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 7338948bd7d3..02aa300c4633 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -338,7 +338,6 @@ void __init smp_prepare_boot_cpu(void) per_cpu(cpu_data, cpu).idle = current; - cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); cpu_set(cpu, cpu_online_map); } diff --git a/arch/arm/mach-integrator/platsmp.c b/arch/arm/mach-integrator/platsmp.c index ea10bd8c972c..1bc8534ef0c6 100644 --- a/arch/arm/mach-integrator/platsmp.c +++ b/arch/arm/mach-integrator/platsmp.c @@ -140,6 +140,18 @@ static void __init poke_milo(void) mb(); } +/* + * Initialise the CPU possible map early - this describes the CPUs + * which may be present or become present in the system. + */ +void __init smp_init_cpus(void) +{ + unsigned int i, ncores = get_core_count(); + + for (i = 0; i < ncores; i++) + cpu_set(i, cpu_possible_map); +} + void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned int ncores = get_core_count(); @@ -176,14 +188,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) max_cpus = ncores; /* - * Initialise the possible/present maps. - * cpu_possible_map describes the set of CPUs which may be present - * cpu_present_map describes the set of CPUs populated + * Initialise the present map, which describes the set of CPUs + * actually populated at the present time. */ - for (i = 0; i < max_cpus; i++) { - cpu_set(i, cpu_possible_map); + for (i = 0; i < max_cpus; i++) cpu_set(i, cpu_present_map); - } /* * Do we need any more CPUs? If so, then let them know where diff --git a/arch/arm/mach-realview/platsmp.c b/arch/arm/mach-realview/platsmp.c index a8fbd76d8be5..b8484e15dacb 100644 --- a/arch/arm/mach-realview/platsmp.c +++ b/arch/arm/mach-realview/platsmp.c @@ -143,6 +143,18 @@ static void __init poke_milo(void) mb(); } +/* + * Initialise the CPU possible map early - this describes the CPUs + * which may be present or become present in the system. + */ +void __init smp_init_cpus(void) +{ + unsigned int i, ncores = get_core_count(); + + for (i = 0; i < ncores; i++) + cpu_set(i, cpu_possible_map); +} + void __init smp_prepare_cpus(unsigned int max_cpus) { unsigned int ncores = get_core_count(); @@ -179,14 +191,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) local_timer_setup(cpu); /* - * Initialise the possible/present maps. - * cpu_possible_map describes the set of CPUs which may be present - * cpu_present_map describes the set of CPUs populated + * Initialise the present map, which describes the set of CPUs + * actually populated at the present time. */ - for (i = 0; i < max_cpus; i++) { - cpu_set(i, cpu_possible_map); + for (i = 0; i < max_cpus; i++) cpu_set(i, cpu_present_map); - } /* * Do we need any more CPUs? If so, then let them know where diff --git a/include/asm-arm/smp.h b/include/asm-arm/smp.h index 5a72e50ca9fc..fe45f7f61223 100644 --- a/include/asm-arm/smp.h +++ b/include/asm-arm/smp.h @@ -41,6 +41,11 @@ extern void show_ipi_list(struct seq_file *p); */ asmlinkage void do_IPI(struct pt_regs *regs); +/* + * Setup the SMP cpu_possible_map + */ +extern void smp_init_cpus(void); + /* * Move global data into per-processor storage. */ -- cgit v1.2.3 From d9db950cfa3d674ee834d980c329efdf8e4a0568 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 16 Feb 2006 22:36:15 +0000 Subject: [ARM] 3339/1: ARM EABI: make unmuxed syscalls visible Patch from Nicolas Pitre With EABI the multiplex sys_ipc and sys_socketcall syscalls are unavailable and their support code even removed from the compiled kernel, and the new unmuxed syscalls must be used instead. Make those syscall numbers visible. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- include/asm-arm/unistd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h index 77430d6178ae..8f331bbd39a8 100644 --- a/include/asm-arm/unistd.h +++ b/include/asm-arm/unistd.h @@ -309,7 +309,7 @@ #define __NR_mq_getsetattr (__NR_SYSCALL_BASE+279) #define __NR_waitid (__NR_SYSCALL_BASE+280) -#if 0 /* reserve these for un-muxing socketcall */ +#if defined(__ARM_EABI__) /* reserve these for un-muxing socketcall */ #define __NR_socket (__NR_SYSCALL_BASE+281) #define __NR_bind (__NR_SYSCALL_BASE+282) #define __NR_connect (__NR_SYSCALL_BASE+283) @@ -329,7 +329,7 @@ #define __NR_recvmsg (__NR_SYSCALL_BASE+297) #endif -#if 0 /* reserve these for un-muxing ipc */ +#if defined(__ARM_EABI__) /* reserve these for un-muxing ipc */ #define __NR_semop (__NR_SYSCALL_BASE+298) #define __NR_semget (__NR_SYSCALL_BASE+299) #define __NR_semctl (__NR_SYSCALL_BASE+300) @@ -347,7 +347,7 @@ #define __NR_request_key (__NR_SYSCALL_BASE+310) #define __NR_keyctl (__NR_SYSCALL_BASE+311) -#if 0 /* reserved for un-muxing ipc */ +#if defined(__ARM_EABI__) /* reserved for un-muxing ipc */ #define __NR_semtimedop (__NR_SYSCALL_BASE+312) #endif -- cgit v1.2.3 From a62eaf151d9cb478d127cfbc2e93c498869785b0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:41:58 +0100 Subject: [PATCH] x86_64: Add boot option to disable randomized mappings and cleanup AMD SimNow!'s JIT doesn't like them at all in the guest. For distribution installation it's easiest if it's a boot time option. Also I moved the variable to a more appropiate place and make it independent from sysctl And marked __read_mostly which it is. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 3 +++ arch/i386/kernel/cpu/transmeta.c | 1 + include/linux/kernel.h | 6 ------ include/linux/mm.h | 2 ++ kernel/sysctl.c | 2 -- mm/memory.c | 10 ++++++++++ 6 files changed, 16 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index ac75b57edf2e..b874771385cd 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1638,6 +1638,9 @@ running once the system is up. Format: ,,,,,[,[,[,]]] + norandmaps Don't use address space randomization + Equivalent to echo 0 > /proc/sys/kernel/randomize_va_space + ______________________________________________________________________ Changelog: diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index bdbeb77f4e22..7214c9b577ab 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/include/linux/kernel.h b/include/linux/kernel.h index b49affa0ac5a..3b507bf05d09 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -326,12 +326,6 @@ struct sysinfo { /* Force a compilation error if condition is true */ #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) -#ifdef CONFIG_SYSCTL -extern int randomize_va_space; -#else -#define randomize_va_space 1 -#endif - /* Trap pasters of __FUNCTION__ at compile-time */ #define __FUNCTION__ (__func__) diff --git a/include/linux/mm.h b/include/linux/mm.h index 75e9f0724997..26e1663a5cbe 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1051,5 +1051,7 @@ int shrink_slab(unsigned long scanned, gfp_t gfp_mask, void drop_pagecache(void); void drop_slab(void); +extern int randomize_va_space; + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 71dd6f62efec..7654d55c47f5 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -126,8 +126,6 @@ extern int sysctl_hz_timer; extern int acct_parm[]; #endif -int randomize_va_space = 1; - static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, ctl_table *, void **); static int proc_doutsstring(ctl_table *table, int write, struct file *filp, diff --git a/mm/memory.c b/mm/memory.c index 2bee1f21aa8a..9abc6008544b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -82,6 +82,16 @@ EXPORT_SYMBOL(num_physpages); EXPORT_SYMBOL(high_memory); EXPORT_SYMBOL(vmalloc_earlyreserve); +int randomize_va_space __read_mostly = 1; + +static int __init disable_randmaps(char *s) +{ + randomize_va_space = 0; + return 0; +} +__setup("norandmaps", disable_randmaps); + + /* * If a p?d_bad entry is found while walking page tables, report * the error, before resetting entry to p?d_none. Usually (but -- cgit v1.2.3 From 7fd67843b96f90f59c9a244a1bc25137978a3ff9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 16 Feb 2006 23:42:07 +0100 Subject: [PATCH] x86_64: Disable tsc when apicpmtimer is active Otherwise it has no effect anyways. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/apic.c | 1 + arch/x86_64/kernel/time.c | 3 +-- include/asm-x86_64/proto.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 7a0a3e8d5d72..e5b14c57eaa0 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -1152,6 +1152,7 @@ __setup("noapicmaintimer", setup_noapicmaintimer); static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; + notsc_setup(NULL); return setup_apicmaintimer(NULL); } __setup("apicpmtimer", setup_apicpmtimer); diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 3c58c30506a1..67841d11ed1f 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -1327,8 +1327,7 @@ static int __init nohpet_setup(char *s) __setup("nohpet", nohpet_setup); - -static int __init notsc_setup(char *s) +int __init notsc_setup(char *s) { notsc = 1; return 0; diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index c99832e7bf3f..eca3f2d633db 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -133,6 +133,7 @@ extern int fix_aperture; extern int force_iommu; extern int reboot_force; +extern int notsc_setup(char *); extern void smp_local_timer_interrupt(struct pt_regs * regs); -- cgit v1.2.3 From 726c14bf499e91e7ede4f1728830aba05c675061 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 17 Feb 2006 10:30:23 +1100 Subject: [PATCH] Provide an interface for getting the current tick length This provides an interface for arch code to find out how many nanoseconds are going to be added on to xtime by the next call to do_timer. The value returned is a fixed-point number in 52.12 format in nanoseconds. The reason for this format is that it gives the full precision that the timekeeping code is using internally. The motivation for this is to fix a problem that has arisen on 32-bit powerpc in that the value returned by do_gettimeofday drifts apart from xtime if NTP is being used. PowerPC is now using a lockless do_gettimeofday based on reading the timebase register and performing some simple arithmetic. (This method of getting the time is also exported to userspace via the VDSO.) However, the factor and offset it uses were calculated based on the nominal tick length and weren't being adjusted when NTP varied the tick length. Note that 64-bit powerpc has had the lockless do_gettimeofday for a long time now. It also had an extremely hairy routine that got called from the 32-bit compat routine for adjtimex, which adjusted the factor and offset according to what it thought the timekeeping code was going to do. Not only was this only called if a 32-bit task did adjtimex (i.e. not if a 64-bit task did adjtimex), it was also duplicating computations from kernel/timer.c and it wasn't clear that it was (still) correct. The simple solution is to ask the timekeeping code how long the current jiffy will be on each timer interrupt, after calling do_timer. If this jiffy will be a different length from the last one, we then need to compute new values for the factor and offset used in the lockless do_gettimeofday. In this way we can keep xtime and do_gettimeofday in sync, even when NTP is varying the tick length. Note that when adjtimex varies the tick length, it almost always introduces the variation from the next tick on. The only case I could see where adjtimex would vary the length of the current tick is when an old-style adjtime adjustment is being cancelled. (It's not clear to me why the adjustment has to be cancelled immediately rather than from the next tick on.) Thus I don't see any real need for a hook in adjtimex; the rare case of an old-style adjustment being cancelled can be fixed up at the next tick. Signed-off-by: Paul Mackerras Acked-by: john stultz Signed-off-by: Linus Torvalds --- include/linux/timex.h | 3 +++ kernel/timer.c | 39 ++++++++++++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index 04a4a8cb4ed3..b7ca1204e42a 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -345,6 +345,9 @@ time_interpolator_reset(void) #endif /* !CONFIG_TIME_INTERPOLATION */ +/* Returns how long ticks are at present, in ns / 2^(SHIFT_SCALE-10). */ +extern u64 current_tick_length(void); + #endif /* KERNEL */ #endif /* LINUX_TIMEX_H */ diff --git a/kernel/timer.c b/kernel/timer.c index b9dad3994676..fe3a9a9f8328 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -717,12 +717,16 @@ static void second_overflow(void) #endif } -/* in the NTP reference this is called "hardclock()" */ -static void update_wall_time_one_tick(void) +/* + * Returns how many microseconds we need to add to xtime this tick + * in doing an adjustment requested with adjtime. + */ +static long adjtime_adjustment(void) { - long time_adjust_step, delta_nsec; + long time_adjust_step; - if ((time_adjust_step = time_adjust) != 0 ) { + time_adjust_step = time_adjust; + if (time_adjust_step) { /* * We are doing an adjtime thing. Prepare time_adjust_step to * be within bounds. Note that a positive time_adjust means we @@ -733,10 +737,19 @@ static void update_wall_time_one_tick(void) */ time_adjust_step = min(time_adjust_step, (long)tickadj); time_adjust_step = max(time_adjust_step, (long)-tickadj); + } + return time_adjust_step; +} +/* in the NTP reference this is called "hardclock()" */ +static void update_wall_time_one_tick(void) +{ + long time_adjust_step, delta_nsec; + + time_adjust_step = adjtime_adjustment(); + if (time_adjust_step) /* Reduce by this step the amount of time left */ time_adjust -= time_adjust_step; - } delta_nsec = tick_nsec + time_adjust_step * 1000; /* * Advance the phase, once it gets to one microsecond, then @@ -758,6 +771,22 @@ static void update_wall_time_one_tick(void) } } +/* + * Return how long ticks are at the moment, that is, how much time + * update_wall_time_one_tick will add to xtime next time we call it + * (assuming no calls to do_adjtimex in the meantime). + * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10 + * bits to the right of the binary point. + * This function has no side-effects. + */ +u64 current_tick_length(void) +{ + long delta_nsec; + + delta_nsec = tick_nsec + adjtime_adjustment() * 1000; + return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj; +} + /* * Using a loop looks inefficient, but "ticks" is * usually just one (we shouldn't be losing ticks, -- cgit v1.2.3 From cfe91f9ce297e23e6fbdf61c02bdd8ab9af7c8a8 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Fri, 17 Feb 2006 03:16:55 -0500 Subject: [PATCH] i386: fix singlestepping though a syscall Do not mask TIF_SINGLESTEP bit in _TIF_WORK_MASK. Masking this stopped do_notify_resume() from being called when it should have been. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Linus Torvalds --- include/asm-i386/thread_info.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h index e20e99551d71..1f7d48c9ba3f 100644 --- a/include/asm-i386/thread_info.h +++ b/include/asm-i386/thread_info.h @@ -158,8 +158,8 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__; /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ - (0x0000FFFF & ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|\ - _TIF_SECCOMP|_TIF_SYSCALL_EMU)) + (0x0000FFFF & ~(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ + _TIF_SECCOMP | _TIF_SYSCALL_EMU)) /* work to do on any return to u-space */ #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) -- cgit v1.2.3 From 255acee706b333b79f593dd366f16e1f107cccc3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 17 Feb 2006 13:52:46 -0800 Subject: [PATCH] s390: additional_cpus parameter Introduce additional_cpus command line option. By default no additional cpu can be attached to the system anymore. Only the cpus present at IPL time can be switched on/off. If it is desired that additional cpus can be attached to the system the maximum number of additional cpus needs to be specified with this option. This change is necessary in order to limit the waste of per_cpu data structures. Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cpu-hotplug.txt | 10 ++++---- arch/s390/kernel/setup.c | 2 ++ arch/s390/kernel/smp.c | 58 +++++++++++++++++++++++++++++-------------- include/asm-s390/smp.h | 2 ++ 4 files changed, 49 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index e05278087ffa..4d3355da0e26 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -11,6 +11,8 @@ Joel Schopp ia64/x86_64: Ashok Raj + s390: + Heiko Carstens Authors: Ashok Raj Lots of feedback: Nathan Lynch , @@ -44,11 +46,9 @@ maxcpus=n Restrict boot time cpus to n. Say if you have 4 cpus, using maxcpus=2 will only boot 2. You can choose to bring the other cpus later online, read FAQ's for more info. -additional_cpus*=n Use this to limit hotpluggable cpus. This option sets - cpu_possible_map = cpu_present_map + additional_cpus - -(*) Option valid only for following architectures -- x86_64, ia64 +additional_cpus=n [x86_64, s390 only] use this to limit hotpluggable cpus. + This option sets + cpu_possible_map = cpu_present_map + additional_cpus ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT to determine the number of potentially hot-pluggable cpus. The implementation diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index de8784267473..24f62f16c0e5 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -600,6 +600,7 @@ setup_arch(char **cmdline_p) init_mm.brk = (unsigned long) &_end; parse_cmdline_early(cmdline_p); + parse_early_param(); setup_memory(); setup_resources(); @@ -607,6 +608,7 @@ setup_arch(char **cmdline_p) cpu_init(); __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr; + smp_setup_cpu_possible_map(); /* * Create kernel page tables and switch to virtual addressing. diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 0d1ad5dbe2b1..53291e94ac7b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -1,8 +1,7 @@ /* * arch/s390/kernel/smp.c * - * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright (C) IBM Corp. 1999,2006 * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), * Martin Schwidefsky (schwidefsky@de.ibm.com) * Heiko Carstens (heiko.carstens@de.ibm.com) @@ -41,8 +40,6 @@ #include #include -/* prototypes */ - extern volatile int __cpu_logical_map[]; /* @@ -51,13 +48,11 @@ extern volatile int __cpu_logical_map[]; struct _lowcore *lowcore_ptr[NR_CPUS]; -cpumask_t cpu_online_map; -cpumask_t cpu_possible_map = CPU_MASK_ALL; +cpumask_t cpu_online_map = CPU_MASK_NONE; +cpumask_t cpu_possible_map = CPU_MASK_NONE; static struct task_struct *current_set[NR_CPUS]; -EXPORT_SYMBOL(cpu_online_map); - /* * Reboot, halt and power_off routines for SMP. */ @@ -490,10 +485,10 @@ void smp_ctl_clear_bit(int cr, int bit) { * Lets check how many CPUs we have. */ -void -__init smp_check_cpus(unsigned int max_cpus) +static unsigned int +__init smp_count_cpus(void) { - int cpu, num_cpus; + unsigned int cpu, num_cpus; __u16 boot_cpu_addr; /* @@ -503,19 +498,20 @@ __init smp_check_cpus(unsigned int max_cpus) boot_cpu_addr = S390_lowcore.cpu_data.cpu_addr; current_thread_info()->cpu = 0; num_cpus = 1; - for (cpu = 0; cpu <= 65535 && num_cpus < max_cpus; cpu++) { + for (cpu = 0; cpu <= 65535; cpu++) { if ((__u16) cpu == boot_cpu_addr) continue; - __cpu_logical_map[num_cpus] = (__u16) cpu; - if (signal_processor(num_cpus, sigp_sense) == + __cpu_logical_map[1] = (__u16) cpu; + if (signal_processor(1, sigp_sense) == sigp_not_operational) continue; - cpu_set(num_cpus, cpu_present_map); num_cpus++; } printk("Detected %d CPU's\n",(int) num_cpus); printk("Boot cpu address %2X\n", boot_cpu_addr); + + return num_cpus; } /* @@ -676,6 +672,32 @@ __cpu_up(unsigned int cpu) return 0; } +static unsigned int __initdata additional_cpus; + +void __init smp_setup_cpu_possible_map(void) +{ + unsigned int pcpus, cpu; + + pcpus = smp_count_cpus() + additional_cpus; + + if (pcpus > NR_CPUS) + pcpus = NR_CPUS; + + for (cpu = 0; cpu < pcpus; cpu++) + cpu_set(cpu, cpu_possible_map); + + cpu_present_map = cpu_possible_map; +} + +#ifdef CONFIG_HOTPLUG_CPU + +static int __init setup_additional_cpus(char *s) +{ + additional_cpus = simple_strtoul(s, NULL, 0); + return 0; +} +early_param("additional_cpus", setup_additional_cpus); + int __cpu_disable(void) { @@ -744,6 +766,8 @@ cpu_die(void) for(;;); } +#endif /* CONFIG_HOTPLUG_CPU */ + /* * Cycle through the processors and setup structures. */ @@ -757,7 +781,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) /* request the 0x1201 emergency signal external interrupt */ if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0) panic("Couldn't request external interrupt 0x1201"); - smp_check_cpus(max_cpus); memset(lowcore_ptr,0,sizeof(lowcore_ptr)); /* * Initialize prefix pages and stacks for all possible cpus @@ -806,14 +829,12 @@ void __devinit smp_prepare_boot_cpu(void) BUG_ON(smp_processor_id() != 0); cpu_set(0, cpu_online_map); - cpu_set(0, cpu_present_map); S390_lowcore.percpu_offset = __per_cpu_offset[0]; current_set[0] = current; } void smp_cpus_done(unsigned int max_cpus) { - cpu_present_map = cpu_possible_map; } /* @@ -845,6 +866,7 @@ static int __init topology_init(void) subsys_initcall(topology_init); +EXPORT_SYMBOL(cpu_online_map); EXPORT_SYMBOL(cpu_possible_map); EXPORT_SYMBOL(lowcore_ptr); EXPORT_SYMBOL(smp_ctl_set_bit); diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h index 9c6e9c300eb9..444dae5912e6 100644 --- a/include/asm-s390/smp.h +++ b/include/asm-s390/smp.h @@ -31,6 +31,7 @@ typedef struct __u16 cpu; } sigp_info; +extern void smp_setup_cpu_possible_map(void); extern int smp_call_function_on(void (*func) (void *info), void *info, int nonatomic, int wait, int cpu); #define NO_PROC_ID 0xFF /* No processor magic marker */ @@ -104,6 +105,7 @@ smp_call_function_on(void (*func) (void *info), void *info, #define smp_cpu_not_running(cpu) 1 #define smp_get_cpu(cpu) ({ 0; }) #define smp_put_cpu(cpu) ({ 0; }) +#define smp_setup_cpu_possible_map() #endif #endif -- cgit v1.2.3 From 200a4552af34b9a32e1f68a881a9ed5c7ec699cc Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 17 Feb 2006 13:52:56 -0800 Subject: [PATCH] powerpc: Fix accidentally-working typo in __pud_free_tlb One of the parameters to the __pud_free_tlb() macro for powerpc is incorrect (see patch) . We get away with it by accident, because the one place the macro is called, the second parameter is a variable named "pud". Signed-off-by: David Gibson Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-powerpc/pgalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-powerpc/pgalloc.h b/include/asm-powerpc/pgalloc.h index 9f5b052784a5..a00ee002cd11 100644 --- a/include/asm-powerpc/pgalloc.h +++ b/include/asm-powerpc/pgalloc.h @@ -146,7 +146,7 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) #ifndef CONFIG_PPC_64K_PAGES -#define __pud_free_tlb(tlb, pmd) \ +#define __pud_free_tlb(tlb, pud) \ pgtable_free_tlb(tlb, pgtable_free_cache(pud, \ PUD_CACHE_NUM, PUD_TABLE_SIZE-1)) #endif /* CONFIG_PPC_64K_PAGES */ -- cgit v1.2.3 From 00fc00df9e7b637cd13fe1f163da0a2957273947 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 Jan 2006 21:22:55 -0500 Subject: [PATCH] m68k: restore disable_irq_nosync() Patch claiming to remove enable_irq_nosync() had left it alive but killed disable_irq_nosync() instead... Signed-off-by: Al Viro --- include/asm-m68k/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-m68k/irq.h b/include/asm-m68k/irq.h index 325c86f8512d..9ac047c400c4 100644 --- a/include/asm-m68k/irq.h +++ b/include/asm-m68k/irq.h @@ -79,7 +79,7 @@ static __inline__ int irq_canonicalize(int irq) extern void (*enable_irq)(unsigned int); extern void (*disable_irq)(unsigned int); -#define enable_irq_nosync enable_irq +#define disable_irq_nosync disable_irq struct pt_regs; -- cgit v1.2.3 From ad6b97fc929e5844bfd1d708ab1d74d131d7960d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 3 Feb 2006 02:06:42 -0500 Subject: [PATCH] iomap_copy fallout (m68k) added __raw_writel(), sanitized include order in iomap_copy.c Signed-off-by: Al Viro --- include/asm-m68k/raw_io.h | 1 + lib/iomap_copy.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-m68k/raw_io.h b/include/asm-m68k/raw_io.h index 5439bcaa57c6..811ccd25d4a6 100644 --- a/include/asm-m68k/raw_io.h +++ b/include/asm-m68k/raw_io.h @@ -336,6 +336,7 @@ static inline void raw_outsw_swapw(volatile u16 __iomem *port, const u16 *buf, : "d0", "a0", "a1", "d6"); } +#define __raw_writel raw_outl #endif /* __KERNEL__ */ diff --git a/lib/iomap_copy.c b/lib/iomap_copy.c index a6b1e271d53c..351045f4f63c 100644 --- a/lib/iomap_copy.c +++ b/lib/iomap_copy.c @@ -15,8 +15,8 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include #include +#include /** * __iowrite32_copy - copy data to MMIO space, in 32-bit units -- cgit v1.2.3 From 0728a2f99ef6efd1984f9e0ed59834c1cc602e6f Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Sat, 11 Feb 2006 18:21:47 +0100 Subject: [PATCH] powerpc: remove duplicate exports A few symbols are exported twice, remove them from ppc_ksyms.c Remove users of sys_ctrler in arch/ppc/ WARNING: vmlinux: duplicate symbol '__delay' previous definition was in vmlinux WARNING: vmlinux: duplicate symbol '__up' previous definition was in vmlinux WARNING: vmlinux: duplicate symbol '__down' previous definition was in vmlinux WARNING: vmlinux: duplicate symbol '__down_interruptible' previous definition was in vmlinux WARNING: vmlinux: duplicate symbol 'sys_ctrler' previous definition was in vmlinux WARNING: vmlinux: duplicate symbol 'strncat' previous definition was in vmlinux WARNING: vmlinux: duplicate symbol 'strncmp' previous definition was in vmlinux WARNING: vmlinux: duplicate symbol 'strchr' previous definition was in vmlinux WARNING: vmlinux: duplicate symbol 'strrchr' previous definition was in vmlinux WARNING: vmlinux: duplicate symbol 'strnlen' previous definition was in vmlinux WARNING: vmlinux: duplicate symbol 'strpbrk' previous definition was in vmlinux WARNING: vmlinux: duplicate symbol 'memscan' previous definition was in vmlinux WARNING: vmlinux: duplicate symbol 'strstr' previous definition was in vmlinux Signed-off-by: Olaf Hering Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/ppc_ksyms.c | 16 ---------------- arch/ppc/kernel/ppc_ksyms.c | 8 -------- arch/ppc/xmon/start.c | 15 +-------------- include/asm-ppc/machdep.h | 13 ------------- 4 files changed, 1 insertion(+), 51 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index d9a459c144d8..8a731ea877b7 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -79,15 +79,8 @@ EXPORT_SYMBOL(sys_sigreturn); EXPORT_SYMBOL(strcpy); EXPORT_SYMBOL(strncpy); EXPORT_SYMBOL(strcat); -EXPORT_SYMBOL(strncat); -EXPORT_SYMBOL(strchr); -EXPORT_SYMBOL(strrchr); -EXPORT_SYMBOL(strpbrk); -EXPORT_SYMBOL(strstr); EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strnlen); EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(strcasecmp); EXPORT_SYMBOL(csum_partial); @@ -185,9 +178,6 @@ EXPORT_SYMBOL(adb_try_handler_change); EXPORT_SYMBOL(cuda_request); EXPORT_SYMBOL(cuda_poll); #endif /* CONFIG_ADB_CUDA */ -#ifdef CONFIG_PPC_PMAC -EXPORT_SYMBOL(sys_ctrler); -#endif #ifdef CONFIG_VT EXPORT_SYMBOL(kd_mksound); #endif @@ -205,7 +195,6 @@ EXPORT_SYMBOL(__lshrdi3); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(memscan); EXPORT_SYMBOL(memcmp); EXPORT_SYMBOL(memchr); @@ -214,7 +203,6 @@ EXPORT_SYMBOL(screen_info); #endif #ifdef CONFIG_PPC32 -EXPORT_SYMBOL(__delay); EXPORT_SYMBOL(timer_interrupt); EXPORT_SYMBOL(irq_desc); EXPORT_SYMBOL(tb_ticks_per_jiffy); @@ -222,10 +210,6 @@ EXPORT_SYMBOL(console_drivers); EXPORT_SYMBOL(cacheable_memcpy); #endif -EXPORT_SYMBOL(__up); -EXPORT_SYMBOL(__down); -EXPORT_SYMBOL(__down_interruptible); - #ifdef CONFIG_8xx EXPORT_SYMBOL(cpm_install_handler); EXPORT_SYMBOL(cpm_free_handler); diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c index 15bd9b448a48..82adb4601348 100644 --- a/arch/ppc/kernel/ppc_ksyms.c +++ b/arch/ppc/kernel/ppc_ksyms.c @@ -93,15 +93,8 @@ EXPORT_SYMBOL(test_and_change_bit); EXPORT_SYMBOL(strcpy); EXPORT_SYMBOL(strncpy); EXPORT_SYMBOL(strcat); -EXPORT_SYMBOL(strncat); -EXPORT_SYMBOL(strchr); -EXPORT_SYMBOL(strrchr); -EXPORT_SYMBOL(strpbrk); -EXPORT_SYMBOL(strstr); EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strnlen); EXPORT_SYMBOL(strcmp); -EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(strcasecmp); EXPORT_SYMBOL(__div64_32); @@ -253,7 +246,6 @@ EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(cacheable_memcpy); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memmove); -EXPORT_SYMBOL(memscan); EXPORT_SYMBOL(memcmp); EXPORT_SYMBOL(memchr); diff --git a/arch/ppc/xmon/start.c b/arch/ppc/xmon/start.c index 4344cbe9b5c5..484f5bb1aa3e 100644 --- a/arch/ppc/xmon/start.c +++ b/arch/ppc/xmon/start.c @@ -146,19 +146,6 @@ xmon_map_scc(void) static int scc_initialized = 0; void xmon_init_scc(void); -extern void cuda_poll(void); - -static inline void do_poll_adb(void) -{ -#ifdef CONFIG_ADB_PMU - if (sys_ctrler == SYS_CTRLER_PMU) - pmu_poll_adb(); -#endif /* CONFIG_ADB_PMU */ -#ifdef CONFIG_ADB_CUDA - if (sys_ctrler == SYS_CTRLER_CUDA) - cuda_poll(); -#endif /* CONFIG_ADB_CUDA */ -} int xmon_write(void *handle, void *ptr, int nb) @@ -189,7 +176,7 @@ xmon_write(void *handle, void *ptr, int nb) ct = 0; for (i = 0; i < nb; ++i) { while ((*sccc & TXRDY) == 0) - do_poll_adb(); + ; c = p[i]; if (c == '\n' && !ct) { c = '\r'; diff --git a/include/asm-ppc/machdep.h b/include/asm-ppc/machdep.h index 39200def8d11..a3e8a45e45a9 100644 --- a/include/asm-ppc/machdep.h +++ b/include/asm-ppc/machdep.h @@ -154,19 +154,6 @@ extern char cmd_line[COMMAND_LINE_SIZE]; extern void setup_pci_ptrs(void); -/* - * Power macintoshes have either a CUDA or a PMU controlling - * system reset, power, NVRAM, RTC. - */ -typedef enum sys_ctrler_kind { - SYS_CTRLER_UNKNOWN = 0, - SYS_CTRLER_CUDA = 1, - SYS_CTRLER_PMU = 2, - SYS_CTRLER_SMU = 3, -} sys_ctrler_t; - -extern sys_ctrler_t sys_ctrler; - #ifdef CONFIG_SMP struct smp_ops_t { void (*message_pass)(int target, int msg); -- cgit v1.2.3 From cc1887f3d8ae8ea61efa1a75af8ec0467b9dd546 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 20 Feb 2006 23:48:38 +0900 Subject: [PATCH] libata: fix qc->n_elem == 0 case handling in ata_qc_next_sg This patch makes ata_for_each_sg() start with pad_sgent when qc->n_elem is zero. Previously, ata_for_each_sg() unconditionally started with qc->__sg, handling the first sg to fill_sg() routines even when the entry was invalid. And while at it, unwind ?: in ata_qc_next_sg() into if statement. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 9e5db2949c58..c91be5e64ede 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -556,6 +556,16 @@ ata_sg_is_last(struct scatterlist *sg, struct ata_queued_cmd *qc) return 0; } +static inline struct scatterlist * +ata_qc_first_sg(struct ata_queued_cmd *qc) +{ + if (qc->n_elem) + return qc->__sg; + if (qc->pad_len) + return &qc->pad_sgent; + return NULL; +} + static inline struct scatterlist * ata_qc_next_sg(struct scatterlist *sg, struct ata_queued_cmd *qc) { @@ -563,11 +573,13 @@ ata_qc_next_sg(struct scatterlist *sg, struct ata_queued_cmd *qc) return NULL; if (++sg - qc->__sg < qc->n_elem) return sg; - return qc->pad_len ? &qc->pad_sgent : NULL; + if (qc->pad_len) + return &qc->pad_sgent; + return NULL; } #define ata_for_each_sg(sg, qc) \ - for (sg = qc->__sg; sg; sg = ata_qc_next_sg(sg, qc)) + for (sg = ata_qc_first_sg(qc); sg; sg = ata_qc_next_sg(sg, qc)) static inline unsigned int ata_tag_valid(unsigned int tag) { -- cgit v1.2.3 From 9b0f8b040acd8dfd23860754c0d09ff4f44e2cbc Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 20 Feb 2006 18:27:52 -0800 Subject: [PATCH] Terminate process that fails on a constrained allocation Some allocations are restricted to a limited set of nodes (due to memory policies or cpuset constraints). If the page allocator is not able to find enough memory then that does not mean that overall system memory is low. In particular going postal and more or less randomly shooting at processes is not likely going to help the situation but may just lead to suicide (the whole system coming down). It is better to signal to the process that no memory exists given the constraints that the process (or the configuration of the process) has placed on the allocation behavior. The process may be killed but then the sysadmin or developer can investigate the situation. The solution is similar to what we do when running out of hugepages. This patch adds a check before we kill processes. At that point performance considerations do not matter much so we just scan the zonelist and reconstruct a list of nodes. If the list of nodes does not contain all online nodes then this is a constrained allocation and we should kill the current process. Signed-off-by: Christoph Lameter Cc: Nick Piggin Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/sysrq.c | 2 +- include/linux/swap.h | 2 +- mm/oom_kill.c | 103 ++++++++++++++++++++++++++++++++++++++------------- mm/page_alloc.c | 2 +- 4 files changed, 81 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index 5765f672e853..d58f82318853 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -243,7 +243,7 @@ static struct sysrq_key_op sysrq_term_op = { static void moom_callback(void *ignored) { - out_of_memory(GFP_KERNEL, 0); + out_of_memory(&NODE_DATA(0)->node_zonelists[ZONE_NORMAL], GFP_KERNEL, 0); } static DECLARE_WORK(moom_work, moom_callback, NULL); diff --git a/include/linux/swap.h b/include/linux/swap.h index f3e17d5963c3..d572b19afb7d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -147,7 +147,7 @@ struct swap_list_t { #define vm_swap_full() (nr_swap_pages*2 < total_swap_pages) /* linux/mm/oom_kill.c */ -extern void out_of_memory(gfp_t gfp_mask, int order); +extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); /* linux/mm/memory.c */ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 949eba1d5ba3..8123fad5a485 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -132,6 +132,36 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) return points; } +/* + * Types of limitations to the nodes from which allocations may occur + */ +#define CONSTRAINT_NONE 1 +#define CONSTRAINT_MEMORY_POLICY 2 +#define CONSTRAINT_CPUSET 3 + +/* + * Determine the type of allocation constraint. + */ +static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) +{ +#ifdef CONFIG_NUMA + struct zone **z; + nodemask_t nodes = node_online_map; + + for (z = zonelist->zones; *z; z++) + if (cpuset_zone_allowed(*z, gfp_mask)) + node_clear((*z)->zone_pgdat->node_id, + nodes); + else + return CONSTRAINT_CPUSET; + + if (!nodes_empty(nodes)) + return CONSTRAINT_MEMORY_POLICY; +#endif + + return CONSTRAINT_NONE; +} + /* * Simple selection loop. We chose the process with the highest * number of 'points'. We expect the caller will lock the tasklist. @@ -184,7 +214,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) * CAP_SYS_RAW_IO set, send SIGTERM instead (but it's unlikely that * we select a process with CAP_SYS_RAW_IO set). */ -static void __oom_kill_task(task_t *p) +static void __oom_kill_task(task_t *p, const char *message) { if (p->pid == 1) { WARN_ON(1); @@ -200,8 +230,8 @@ static void __oom_kill_task(task_t *p) return; } task_unlock(p); - printk(KERN_ERR "Out of Memory: Killed process %d (%s).\n", - p->pid, p->comm); + printk(KERN_ERR "%s: Killed process %d (%s).\n", + message, p->pid, p->comm); /* * We give our sacrificial lamb high priority and access to @@ -214,7 +244,7 @@ static void __oom_kill_task(task_t *p) force_sig(SIGKILL, p); } -static struct mm_struct *oom_kill_task(task_t *p) +static struct mm_struct *oom_kill_task(task_t *p, const char *message) { struct mm_struct *mm = get_task_mm(p); task_t * g, * q; @@ -226,21 +256,21 @@ static struct mm_struct *oom_kill_task(task_t *p) return NULL; } - __oom_kill_task(p); + __oom_kill_task(p, message); /* * kill all processes that share the ->mm (i.e. all threads), * but are in a different thread group */ do_each_thread(g, q) if (q->mm == mm && q->tgid != p->tgid) - __oom_kill_task(q); + __oom_kill_task(q, message); while_each_thread(g, q); return mm; } static struct mm_struct *oom_kill_process(struct task_struct *p, - unsigned long points) + unsigned long points, const char *message) { struct mm_struct *mm; struct task_struct *c; @@ -253,11 +283,11 @@ static struct mm_struct *oom_kill_process(struct task_struct *p, c = list_entry(tsk, struct task_struct, sibling); if (c->mm == p->mm) continue; - mm = oom_kill_task(c); + mm = oom_kill_task(c, message); if (mm) return mm; } - return oom_kill_task(p); + return oom_kill_task(p, message); } /** @@ -268,10 +298,10 @@ static struct mm_struct *oom_kill_process(struct task_struct *p, * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ -void out_of_memory(gfp_t gfp_mask, int order) +void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order) { struct mm_struct *mm = NULL; - task_t * p; + task_t *p; unsigned long points; if (printk_ratelimit()) { @@ -283,25 +313,48 @@ void out_of_memory(gfp_t gfp_mask, int order) cpuset_lock(); read_lock(&tasklist_lock); + + /* + * Check if there were limitations on the allocation (only relevant for + * NUMA) that may require different handling. + */ + switch (constrained_alloc(zonelist, gfp_mask)) { + case CONSTRAINT_MEMORY_POLICY: + mm = oom_kill_process(current, points, + "No available memory (MPOL_BIND)"); + break; + + case CONSTRAINT_CPUSET: + mm = oom_kill_process(current, points, + "No available memory in cpuset"); + break; + + case CONSTRAINT_NONE: retry: - p = select_bad_process(&points); + /* + * Rambo mode: Shoot down a process and hope it solves whatever + * issues we may have. + */ + p = select_bad_process(&points); - if (PTR_ERR(p) == -1UL) - goto out; + if (PTR_ERR(p) == -1UL) + goto out; - /* Found nothing?!?! Either we hang forever, or we panic. */ - if (!p) { - read_unlock(&tasklist_lock); - cpuset_unlock(); - panic("Out of memory and no killable processes...\n"); - } + /* Found nothing?!?! Either we hang forever, or we panic. */ + if (!p) { + read_unlock(&tasklist_lock); + cpuset_unlock(); + panic("Out of memory and no killable processes...\n"); + } - mm = oom_kill_process(p, points); - if (!mm) - goto retry; + mm = oom_kill_process(p, points, "Out of memory"); + if (!mm) + goto retry; + + break; + } - out: - read_unlock(&tasklist_lock); +out: cpuset_unlock(); if (mm) mmput(mm); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 208812b25597..791690d7d3fa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1015,7 +1015,7 @@ rebalance: if (page) goto got_pg; - out_of_memory(gfp_mask, order); + out_of_memory(zonelist, gfp_mask, order); goto restart; } -- cgit v1.2.3 From c255d844dd73616f23e4b4733edcc2e5fa4042b2 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Mon, 20 Feb 2006 18:27:58 -0800 Subject: [PATCH] suspend-to-ram: allow video options to be set at runtime Currently, acpi video options can only be set on kernel command line. That's little inflexible; I'd like userland s2ram application that just works, and modifying kernel command line according to whitelist is not fun. It is better to just allow s2ram application to set video options just before suspend (according to the whitelist). This implements sysctl to allow setting suspend video options without reboot. (akpm: Documentation updates for this new sysctl are pending..) Signed-off-by: Pavel Machek Cc: "Brown, Len" Cc: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/kernel.txt | 10 ++++++++++ include/linux/acpi.h | 3 ++- include/linux/sysctl.h | 1 + kernel/sysctl.c | 16 ++++++++++++---- 4 files changed, 25 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 9f11d36a8c10..b0c7ab93dcb9 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -16,6 +16,7 @@ before actually making adjustments. Currently, these files might (depending on your configuration) show up in /proc/sys/kernel: +- acpi_video_flags - acct - core_pattern - core_uses_pid @@ -57,6 +58,15 @@ show up in /proc/sys/kernel: ============================================================== +acpi_video_flags: + +flags + +See Doc*/kernel/power/video.txt, it allows mode of video boot to be +set during run time. + +============================================================== + acct: highwater lowwater frequency diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 84d3d9f034ce..d3bc25e6d27d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -427,7 +427,8 @@ extern int acpi_mp_config; extern struct acpi_table_mcfg_config *pci_mmcfg_config; extern int pci_mmcfg_config_num; -extern int sbf_port ; +extern int sbf_port; +extern unsigned long acpi_video_flags; #else /* !CONFIG_ACPI */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 32a4139c4ad8..0e92bf7ec28e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -146,6 +146,7 @@ enum KERN_RANDOMIZE=68, /* int: randomize virtual address space */ KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */ KERN_SPIN_RETRY=70, /* int: number of spinlock retries */ + KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ }; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7654d55c47f5..ebc41bf22f1e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -44,14 +44,12 @@ #include #include #include +#include +#include #include #include -#ifdef CONFIG_ROOT_NFS -#include -#endif - #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ @@ -655,6 +653,16 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif +#ifdef CONFIG_ACPI_SLEEP + { + .ctl_name = KERN_ACPI_VIDEO_FLAGS, + .procname = "acpi_video_flags", + .data = &acpi_video_flags, + .maxlen = sizeof (unsigned long), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif { .ctl_name = 0 } }; -- cgit v1.2.3 From 7a9166e3b037296366cea6f3c97f705d33e209e6 Mon Sep 17 00:00:00 2001 From: Luke Yang Date: Mon, 20 Feb 2006 18:28:07 -0800 Subject: [PATCH] Fix undefined symbols for nommu architecture Signed-off-by: Luke Yang Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++++ kernel/sysctl.c | 2 ++ mm/nommu.c | 2 ++ 3 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 26e1663a5cbe..498ff8778fb6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1051,7 +1051,11 @@ int shrink_slab(unsigned long scanned, gfp_t gfp_mask, void drop_pagecache(void); void drop_slab(void); +#ifndef CONFIG_MMU +#define randomize_va_space 0 +#else extern int randomize_va_space; +#endif #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ebc41bf22f1e..c05a2b7125e1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -636,6 +636,7 @@ static ctl_table kern_table[] = { .proc_handler = &proc_dointvec, }, #endif +#if defined(CONFIG_MMU) { .ctl_name = KERN_RANDOMIZE, .procname = "randomize_va_space", @@ -644,6 +645,7 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif #if defined(CONFIG_S390) && defined(CONFIG_SMP) { .ctl_name = KERN_SPIN_RETRY, diff --git a/mm/nommu.c b/mm/nommu.c index c10262d68232..99d21020ec9d 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -57,6 +57,8 @@ EXPORT_SYMBOL(vmalloc); EXPORT_SYMBOL(vfree); EXPORT_SYMBOL(vmalloc_to_page); EXPORT_SYMBOL(vmalloc_32); +EXPORT_SYMBOL(vmap); +EXPORT_SYMBOL(vunmap); /* * Handle all mappings that got truncated by a "truncate()" -- cgit v1.2.3 From 7fd105e758c8d746d57ab7e77f100e096bf153c8 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 20 Feb 2006 18:28:08 -0800 Subject: [PATCH] Fix compile for CONFIG_SYSVIPC=n or CONFIG_SYSCTL=n The compat syscalls are added to sys_ni.c since they are not defined if the above CONFIG options are off. Also, nfs would not build with CONFIG_SYSCTL off. Noticed by Arthur Othieno. Signed-off-by: Stephen Rothwell Cc: "David S. Miller" Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfs_fs.h | 2 +- kernel/sys_ni.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 547d649b274e..b4dc6e2e10c9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -398,7 +398,7 @@ extern struct inode_operations nfs_symlink_inode_operations; extern int nfs_register_sysctl(void); extern void nfs_unregister_sysctl(void); #else -#define nfs_register_sysctl() do { } while(0) +#define nfs_register_sysctl() 0 #define nfs_unregister_sysctl() do { } while(0) #endif diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 17313b99e53d..1067090db6b1 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -104,6 +104,8 @@ cond_syscall(sys_setreuid16); cond_syscall(sys_setuid16); cond_syscall(sys_vm86old); cond_syscall(sys_vm86); +cond_syscall(compat_sys_ipc); +cond_syscall(compat_sys_sysctl); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); -- cgit v1.2.3 From 49d9c81a699b57a5b6488f3a761669d05e116588 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 20 Feb 2006 18:28:14 -0800 Subject: [PATCH] s390: revert dasd eer module Revert dasd eer module until we have a common understanding of how the interface should be. Signed-off-by: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/s390/block/Kconfig | 14 +- drivers/s390/block/Makefile | 2 - drivers/s390/block/dasd.c | 76 +-- drivers/s390/block/dasd_3990_erp.c | 3 - drivers/s390/block/dasd_eckd.h | 1 - drivers/s390/block/dasd_eer.c | 1090 ------------------------------------ drivers/s390/block/dasd_int.h | 37 -- include/asm-s390/dasd.h | 13 +- 8 files changed, 5 insertions(+), 1231 deletions(-) delete mode 100644 drivers/s390/block/dasd_eer.c (limited to 'include') diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig index 6912399d0937..6f50cc9323d9 100644 --- a/drivers/s390/block/Kconfig +++ b/drivers/s390/block/Kconfig @@ -55,21 +55,13 @@ config DASD_DIAG Disks under VM. If you are not running under VM or unsure what it is, say "N". -config DASD_EER - tristate "Extended error reporting (EER)" - depends on DASD - help - This driver provides a character device interface to the - DASD extended error reporting. This is only needed if you want to - use applications written for the EER facility. - config DASD_CMB tristate "Compatibility interface for DASD channel measurement blocks" depends on DASD help - This driver provides an additional interface to the channel - measurement facility, which is normally accessed though sysfs, with - a set of ioctl functions specific to the dasd driver. + This driver provides an additional interface to the channel measurement + facility, which is normally accessed though sysfs, with a set of + ioctl functions specific to the dasd driver. This is only needed if you want to use applications written for linux-2.4 dasd channel measurement facility interface. diff --git a/drivers/s390/block/Makefile b/drivers/s390/block/Makefile index 0c0d871e8f51..58c6780134f7 100644 --- a/drivers/s390/block/Makefile +++ b/drivers/s390/block/Makefile @@ -5,7 +5,6 @@ dasd_eckd_mod-objs := dasd_eckd.o dasd_3990_erp.o dasd_9343_erp.o dasd_fba_mod-objs := dasd_fba.o dasd_3370_erp.o dasd_9336_erp.o dasd_diag_mod-objs := dasd_diag.o -dasd_eer_mod-objs := dasd_eer.o dasd_mod-objs := dasd.o dasd_ioctl.o dasd_proc.o dasd_devmap.o \ dasd_genhd.o dasd_erp.o @@ -14,6 +13,5 @@ obj-$(CONFIG_DASD_DIAG) += dasd_diag_mod.o obj-$(CONFIG_DASD_ECKD) += dasd_eckd_mod.o obj-$(CONFIG_DASD_FBA) += dasd_fba_mod.o obj-$(CONFIG_DASD_CMB) += dasd_cmb.o -obj-$(CONFIG_DASD_EER) += dasd_eer.o obj-$(CONFIG_BLK_DEV_XPRAM) += xpram.o obj-$(CONFIG_DCSSBLK) += dcssblk.o diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 06bb992a4c6c..af1d5b404cee 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -58,7 +57,6 @@ static void dasd_int_handler(struct ccw_device *, unsigned long, struct irb *); static void dasd_flush_ccw_queue(struct dasd_device *, int); static void dasd_tasklet(struct dasd_device *); static void do_kick_device(void *data); -static void dasd_disable_eer(struct dasd_device *device); /* * SECTION: Operations on the device structure. @@ -153,8 +151,6 @@ dasd_state_new_to_known(struct dasd_device *device) static inline void dasd_state_known_to_new(struct dasd_device * device) { - /* disable extended error reporting for this device */ - dasd_disable_eer(device); /* Forget the discipline information. */ if (device->discipline) module_put(device->discipline->owner); @@ -876,9 +872,6 @@ dasd_handle_state_change_pending(struct dasd_device *device) struct dasd_ccw_req *cqr; struct list_head *l, *n; - /* first of all call extended error reporting */ - dasd_write_eer_trigger(DASD_EER_STATECHANGE, device, NULL); - device->stopped &= ~DASD_STOPPED_PENDING; /* restart all 'running' IO on queue */ @@ -1098,19 +1091,6 @@ restart: } goto restart; } - - /* first of all call extended error reporting */ - if (device->eer && cqr->status == DASD_CQR_FAILED) { - dasd_write_eer_trigger(DASD_EER_FATALERROR, - device, cqr); - - /* restart request */ - cqr->status = DASD_CQR_QUEUED; - cqr->retries = 255; - device->stopped |= DASD_STOPPED_QUIESCE; - goto restart; - } - /* Process finished ERP request. */ if (cqr->refers) { __dasd_process_erp(device, cqr); @@ -1248,8 +1228,7 @@ __dasd_start_head(struct dasd_device * device) cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, list); /* check FAILFAST */ if (device->stopped & ~DASD_STOPPED_PENDING && - test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) && - (!device->eer)) { + test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags)) { cqr->status = DASD_CQR_FAILED; dasd_schedule_bh(device); } @@ -2005,9 +1984,6 @@ dasd_generic_notify(struct ccw_device *cdev, int event) switch (event) { case CIO_GONE: case CIO_NO_PATH: - /* first of all call extended error reporting */ - dasd_write_eer_trigger(DASD_EER_NOPATH, device, NULL); - if (device->state < DASD_STATE_BASIC) break; /* Device is active. We want to keep it. */ @@ -2065,51 +2041,6 @@ dasd_generic_auto_online (struct ccw_driver *dasd_discipline_driver) put_driver(drv); } -/* - * notifications for extended error reports - */ -static struct notifier_block *dasd_eer_chain; - -int -dasd_register_eer_notifier(struct notifier_block *nb) -{ - return notifier_chain_register(&dasd_eer_chain, nb); -} - -int -dasd_unregister_eer_notifier(struct notifier_block *nb) -{ - return notifier_chain_unregister(&dasd_eer_chain, nb); -} - -/* - * Notify the registered error reporting module of a problem - */ -void -dasd_write_eer_trigger(unsigned int id, struct dasd_device *device, - struct dasd_ccw_req *cqr) -{ - if (device->eer) { - struct dasd_eer_trigger temp; - temp.id = id; - temp.device = device; - temp.cqr = cqr; - notifier_call_chain(&dasd_eer_chain, DASD_EER_TRIGGER, - (void *)&temp); - } -} - -/* - * Tell the registered error reporting module to disable error reporting for - * a given device and to cleanup any private data structures on that device. - */ -static void -dasd_disable_eer(struct dasd_device *device) -{ - notifier_call_chain(&dasd_eer_chain, DASD_EER_DISABLE, (void *)device); -} - - static int __init dasd_init(void) { @@ -2191,11 +2122,6 @@ EXPORT_SYMBOL_GPL(dasd_generic_set_online); EXPORT_SYMBOL_GPL(dasd_generic_set_offline); EXPORT_SYMBOL_GPL(dasd_generic_auto_online); -EXPORT_SYMBOL(dasd_register_eer_notifier); -EXPORT_SYMBOL(dasd_unregister_eer_notifier); -EXPORT_SYMBOL(dasd_write_eer_trigger); - - /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index c811380b9079..4ee0f934e325 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -1108,9 +1108,6 @@ dasd_3990_handle_env_data(struct dasd_ccw_req * erp, char *sense) case 0x0B: DEV_MESSAGE(KERN_WARNING, device, "%s", "FORMAT F - Volume is suspended duplex"); - /* call extended error reporting (EER) */ - dasd_write_eer_trigger(DASD_EER_PPRCSUSPEND, device, - erp->refers); break; case 0x0C: DEV_MESSAGE(KERN_WARNING, device, "%s", diff --git a/drivers/s390/block/dasd_eckd.h b/drivers/s390/block/dasd_eckd.h index e15dd7978050..bc3823d35223 100644 --- a/drivers/s390/block/dasd_eckd.h +++ b/drivers/s390/block/dasd_eckd.h @@ -29,7 +29,6 @@ #define DASD_ECKD_CCW_PSF 0x27 #define DASD_ECKD_CCW_RSSD 0x3e #define DASD_ECKD_CCW_LOCATE_RECORD 0x47 -#define DASD_ECKD_CCW_SNSS 0x54 #define DASD_ECKD_CCW_DEFINE_EXTENT 0x63 #define DASD_ECKD_CCW_WRITE_MT 0x85 #define DASD_ECKD_CCW_READ_MT 0x86 diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c deleted file mode 100644 index f70cd7716b24..000000000000 --- a/drivers/s390/block/dasd_eer.c +++ /dev/null @@ -1,1090 +0,0 @@ -/* - * character device driver for extended error reporting - * - * - * Copyright (C) 2005 IBM Corporation - * extended error reporting for DASD ECKD devices - * Author(s): Stefan Weinhuber - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "dasd_int.h" -#include "dasd_eckd.h" - - -MODULE_LICENSE("GPL"); - -MODULE_AUTHOR("Stefan Weinhuber "); -MODULE_DESCRIPTION("DASD extended error reporting module"); - - -#ifdef PRINTK_HEADER -#undef PRINTK_HEADER -#endif /* PRINTK_HEADER */ -#define PRINTK_HEADER "dasd(eer):" - - - - - -/*****************************************************************************/ -/* the internal buffer */ -/*****************************************************************************/ - -/* - * The internal buffer is meant to store obaque blobs of data, so it doesn't - * know of higher level concepts like triggers. - * It consists of a number of pages that are used as a ringbuffer. Each data - * blob is stored in a simple record that consists of an integer, which - * contains the size of the following data, and the data bytes themselfes. - * - * To allow for multiple independent readers we create one internal buffer - * each time the device is opened and destroy the buffer when the file is - * closed again. - * - * One record can be written to a buffer by using the functions - * - dasd_eer_start_record (one time per record to write the size to the buffer - * and reserve the space for the data) - * - dasd_eer_write_buffer (one or more times per record to write the data) - * The data can be written in several steps but you will have to compute - * the total size up front for the invocation of dasd_eer_start_record. - * If the ringbuffer is full, dasd_eer_start_record will remove the required - * number of old records. - * - * A record is typically read in two steps, first read the integer that - * specifies the size of the following data, then read the data. - * Both can be done by - * - dasd_eer_read_buffer - * - * For all mentioned functions you need to get the bufferlock first and keep it - * until a complete record is written or read. - */ - - -/* - * Alle information necessary to keep track of an internal buffer is kept in - * a struct eerbuffer. The buffer specific to a file pointer is strored in - * the private_data field of that file. To be able to write data to all - * existing buffers, each buffer is also added to the bufferlist. - * If the user doesn't want to read a complete record in one go, we have to - * keep track of the rest of the record. residual stores the number of bytes - * that are still to deliver. If the rest of the record is invalidated between - * two reads then residual will be set to -1 so that the next read will fail. - * All entries in the eerbuffer structure are protected with the bufferlock. - * To avoid races between writing to a buffer on the one side and creating - * and destroying buffers on the other side, the bufferlock must also be used - * to protect the bufferlist. - */ - -struct eerbuffer { - struct list_head list; - char **buffer; - int buffersize; - int buffer_page_count; - int head; - int tail; - int residual; -}; - -LIST_HEAD(bufferlist); - -static spinlock_t bufferlock = SPIN_LOCK_UNLOCKED; - -DECLARE_WAIT_QUEUE_HEAD(dasd_eer_read_wait_queue); - -/* - * How many free bytes are available on the buffer. - * needs to be called with bufferlock held - */ -static int -dasd_eer_get_free_bytes(struct eerbuffer *eerb) -{ - if (eerb->head < eerb->tail) { - return eerb->tail - eerb->head - 1; - } else - return eerb->buffersize - eerb->head + eerb->tail -1; -} - -/* - * How many bytes of buffer space are used. - * needs to be called with bufferlock held - */ -static int -dasd_eer_get_filled_bytes(struct eerbuffer *eerb) -{ - - if (eerb->head >= eerb->tail) { - return eerb->head - eerb->tail; - } else - return eerb->buffersize - eerb->tail + eerb->head; -} - -/* - * The dasd_eer_write_buffer function just copies count bytes of data - * to the buffer. Make sure to call dasd_eer_start_record first, to - * make sure that enough free space is available. - * needs to be called with bufferlock held - */ -static void -dasd_eer_write_buffer(struct eerbuffer *eerb, int count, char *data) -{ - - unsigned long headindex,localhead; - unsigned long rest, len; - char *nextdata; - - nextdata = data; - rest = count; - while (rest > 0) { - headindex = eerb->head / PAGE_SIZE; - localhead = eerb->head % PAGE_SIZE; - len = min(rest, (PAGE_SIZE - localhead)); - memcpy(eerb->buffer[headindex]+localhead, nextdata, len); - nextdata += len; - rest -= len; - eerb->head += len; - if ( eerb->head == eerb->buffersize ) - eerb->head = 0; /* wrap around */ - if (eerb->head > eerb->buffersize) { - MESSAGE(KERN_ERR, "%s", "runaway buffer head."); - BUG(); - } - } -} - -/* - * needs to be called with bufferlock held - */ -static int -dasd_eer_read_buffer(struct eerbuffer *eerb, int count, char *data) -{ - - unsigned long tailindex,localtail; - unsigned long rest, len, finalcount; - char *nextdata; - - finalcount = min(count, dasd_eer_get_filled_bytes(eerb)); - nextdata = data; - rest = finalcount; - while (rest > 0) { - tailindex = eerb->tail / PAGE_SIZE; - localtail = eerb->tail % PAGE_SIZE; - len = min(rest, (PAGE_SIZE - localtail)); - memcpy(nextdata, eerb->buffer[tailindex]+localtail, len); - nextdata += len; - rest -= len; - eerb->tail += len; - if ( eerb->tail == eerb->buffersize ) - eerb->tail = 0; /* wrap around */ - if (eerb->tail > eerb->buffersize) { - MESSAGE(KERN_ERR, "%s", "runaway buffer tail."); - BUG(); - } - } - return finalcount; -} - -/* - * Whenever you want to write a blob of data to the internal buffer you - * have to start by using this function first. It will write the number - * of bytes that will be written to the buffer. If necessary it will remove - * old records to make room for the new one. - * needs to be called with bufferlock held - */ -static int -dasd_eer_start_record(struct eerbuffer *eerb, int count) -{ - int tailcount; - if (count + sizeof(count) > eerb->buffersize) - return -ENOMEM; - while (dasd_eer_get_free_bytes(eerb) < count + sizeof(count)) { - if (eerb->residual > 0) { - eerb->tail += eerb->residual; - if (eerb->tail >= eerb->buffersize) - eerb->tail -= eerb->buffersize; - eerb->residual = -1; - } - dasd_eer_read_buffer(eerb, sizeof(tailcount), - (char*)(&tailcount)); - eerb->tail += tailcount; - if (eerb->tail >= eerb->buffersize) - eerb->tail -= eerb->buffersize; - } - dasd_eer_write_buffer(eerb, sizeof(count), (char*)(&count)); - - return 0; -}; - -/* - * release pages that are not used anymore - */ -static void -dasd_eer_free_buffer_pages(char **buf, int no_pages) -{ - int i; - - for (i = 0; i < no_pages; ++i) { - free_page((unsigned long)buf[i]); - } -} - -/* - * allocate a new set of memory pages - */ -static int -dasd_eer_allocate_buffer_pages(char **buf, int no_pages) -{ - int i; - - for (i = 0; i < no_pages; ++i) { - buf[i] = (char *) get_zeroed_page(GFP_KERNEL); - if (!buf[i]) { - dasd_eer_free_buffer_pages(buf, i); - return -ENOMEM; - } - } - return 0; -} - -/* - * empty the buffer by resetting head and tail - * In case there is a half read data blob in the buffer, we set residual - * to -1 to indicate that the remainder of the blob is lost. - */ -static void -dasd_eer_purge_buffer(struct eerbuffer *eerb) -{ - unsigned long flags; - - spin_lock_irqsave(&bufferlock, flags); - if (eerb->residual > 0) - eerb->residual = -1; - eerb->tail=0; - eerb->head=0; - spin_unlock_irqrestore(&bufferlock, flags); -} - -/* - * set the size of the buffer, newsize is the new number of pages to be used - * we don't try to copy any data back an forth, so any resize will also purge - * the buffer - */ -static int -dasd_eer_resize_buffer(struct eerbuffer *eerb, int newsize) -{ - int i, oldcount, reuse; - char **new; - char **old; - unsigned long flags; - - if (newsize < 1) - return -EINVAL; - if (eerb->buffer_page_count == newsize) { - /* documented behaviour is that any successfull invocation - * will purge all records */ - dasd_eer_purge_buffer(eerb); - return 0; - } - new = kmalloc(newsize*sizeof(char*), GFP_KERNEL); - if (!new) - return -ENOMEM; - - reuse=min(eerb->buffer_page_count, newsize); - for (i = 0; i < reuse; ++i) { - new[i] = eerb->buffer[i]; - } - if (eerb->buffer_page_count < newsize) { - if (dasd_eer_allocate_buffer_pages( - &new[eerb->buffer_page_count], - newsize - eerb->buffer_page_count)) { - kfree(new); - return -ENOMEM; - } - } - - spin_lock_irqsave(&bufferlock, flags); - old = eerb->buffer; - eerb->buffer = new; - if (eerb->residual > 0) - eerb->residual = -1; - eerb->tail = 0; - eerb->head = 0; - oldcount = eerb->buffer_page_count; - eerb->buffer_page_count = newsize; - spin_unlock_irqrestore(&bufferlock, flags); - - if (oldcount > newsize) { - for (i = newsize; i < oldcount; ++i) { - free_page((unsigned long)old[i]); - } - } - kfree(old); - - return 0; -} - - -/*****************************************************************************/ -/* The extended error reporting functionality */ -/*****************************************************************************/ - -/* - * When a DASD device driver wants to report an error, it calls the - * function dasd_eer_write_trigger (via a notifier mechanism) and gives the - * respective trigger ID as parameter. - * Currently there are four kinds of triggers: - * - * DASD_EER_FATALERROR: all kinds of unrecoverable I/O problems - * DASD_EER_PPRCSUSPEND: PPRC was suspended - * DASD_EER_NOPATH: There is no path to the device left. - * DASD_EER_STATECHANGE: The state of the device has changed. - * - * For the first three triggers all required information can be supplied by - * the caller. For these triggers a record is written by the function - * dasd_eer_write_standard_trigger. - * - * When dasd_eer_write_trigger is called to write a DASD_EER_STATECHANGE - * trigger, we have to gather the necessary sense data first. We cannot queue - * the necessary SNSS (sense subsystem status) request immediatly, since we - * are likely to run in a deadlock situation. Instead, we schedule a - * work_struct that calls the function dasd_eer_sense_subsystem_status to - * create and start an SNSS request asynchronously. - * - * To avoid memory allocations at runtime, the necessary memory is allocated - * when the extended error reporting is enabled for a device (by - * dasd_eer_probe). There is one private eer data structure for each eer - * enabled DASD device. It contains memory for the work_struct, one SNSS cqr - * and a flags field that is used to coordinate the use of the cqr. The call - * to write a state change trigger can come in at any time, so we have one flag - * CQR_IN_USE that protects the cqr itself. When this flag indicates that the - * cqr is currently in use, dasd_eer_sense_subsystem_status cannot start a - * second request but sets the SNSS_REQUESTED flag instead. - * - * When the request is finished, the callback function dasd_eer_SNSS_cb - * is called. This function will invoke the function - * dasd_eer_write_SNSS_trigger to finally write the trigger. It will also - * check the SNSS_REQUESTED flag and if it is set it will call - * dasd_eer_sense_subsystem_status again. - * - * To avoid race conditions during the handling of the lock, the flags must - * be protected by the snsslock. - */ - -struct dasd_eer_private { - struct dasd_ccw_req *cqr; - unsigned long flags; - struct work_struct worker; -}; - -static void dasd_eer_destroy(struct dasd_device *device, - struct dasd_eer_private *eer); -static int -dasd_eer_write_trigger(struct dasd_eer_trigger *trigger); -static void dasd_eer_sense_subsystem_status(void *data); -static int dasd_eer_notify(struct notifier_block *self, - unsigned long action, void *data); - -struct workqueue_struct *dasd_eer_workqueue; - -#define SNSS_DATA_SIZE 44 -static spinlock_t snsslock = SPIN_LOCK_UNLOCKED; - -#define DASD_EER_BUSID_SIZE 10 -struct dasd_eer_header { - __u32 total_size; - __u32 trigger; - __u64 tv_sec; - __u64 tv_usec; - char busid[DASD_EER_BUSID_SIZE]; -} __attribute__ ((packed)); - -static struct notifier_block dasd_eer_nb = { - .notifier_call = dasd_eer_notify, -}; - -/* - * flags for use with dasd_eer_private - */ -#define CQR_IN_USE 0 -#define SNSS_REQUESTED 1 - -/* - * This function checks if extended error reporting is available for a given - * dasd_device. If yes, then it creates and returns a struct dasd_eer, - * otherwise it returns an -EPERM error pointer. - */ -struct dasd_eer_private * -dasd_eer_probe(struct dasd_device *device) -{ - struct dasd_eer_private *private; - - if (!(device && device->discipline - && !strcmp(device->discipline->name, "ECKD"))) { - return ERR_PTR(-EPERM); - } - /* allocate the private data structure */ - private = (struct dasd_eer_private *)kmalloc( - sizeof(struct dasd_eer_private), GFP_KERNEL); - if (!private) { - return ERR_PTR(-ENOMEM); - } - INIT_WORK(&private->worker, dasd_eer_sense_subsystem_status, - (void *)device); - private->cqr = dasd_kmalloc_request("ECKD", - 1 /* SNSS */ , - SNSS_DATA_SIZE , - device); - if (!private->cqr) { - kfree(private); - return ERR_PTR(-ENOMEM); - } - private->flags = 0; - return private; -}; - -/* - * If our private SNSS request is queued, remove it from the - * dasd ccw queue so we can free the requests memory. - */ -static void -dasd_eer_dequeue_SNSS_request(struct dasd_device *device, - struct dasd_eer_private *eer) -{ - struct list_head *lst, *nxt; - struct dasd_ccw_req *cqr, *erpcqr; - dasd_erp_fn_t erp_fn; - - spin_lock_irq(get_ccwdev_lock(device->cdev)); - list_for_each_safe(lst, nxt, &device->ccw_queue) { - cqr = list_entry(lst, struct dasd_ccw_req, list); - /* we are looking for two kinds or requests */ - /* first kind: our SNSS request: */ - if (cqr == eer->cqr) { - if (cqr->status == DASD_CQR_IN_IO) - device->discipline->term_IO(cqr); - list_del(&cqr->list); - break; - } - /* second kind: ERP requests for our SNSS request */ - if (cqr->refers) { - /* If this erp request chain ends in our cqr, then */ - /* cal the erp_postaction to clean it up */ - erpcqr = cqr; - while (erpcqr->refers) { - erpcqr = erpcqr->refers; - } - if (erpcqr == eer->cqr) { - erp_fn = device->discipline->erp_postaction( - cqr); - erp_fn(cqr); - } - continue; - } - } - spin_unlock_irq(get_ccwdev_lock(device->cdev)); -} - -/* - * This function dismantles a struct dasd_eer that was created by - * dasd_eer_probe. Since we want to free our private data structure, - * we must make sure that the memory is not in use anymore. - * We have to flush the work queue and remove a possible SNSS request - * from the dasd queue. - */ -static void -dasd_eer_destroy(struct dasd_device *device, struct dasd_eer_private *eer) -{ - flush_workqueue(dasd_eer_workqueue); - dasd_eer_dequeue_SNSS_request(device, eer); - dasd_kfree_request(eer->cqr, device); - kfree(eer); -}; - -/* - * enable the extended error reporting for a particular device - */ -static int -dasd_eer_enable_on_device(struct dasd_device *device) -{ - void *eer; - if (!device) - return -ENODEV; - if (device->eer) - return 0; - if (!try_module_get(THIS_MODULE)) { - return -EINVAL; - } - eer = (void *)dasd_eer_probe(device); - if (IS_ERR(eer)) { - module_put(THIS_MODULE); - return PTR_ERR(eer); - } - device->eer = eer; - return 0; -} - -/* - * enable the extended error reporting for a particular device - */ -static int -dasd_eer_disable_on_device(struct dasd_device *device) -{ - struct dasd_eer_private *eer = device->eer; - - if (!device) - return -ENODEV; - if (!device->eer) - return 0; - device->eer = NULL; - dasd_eer_destroy(device,eer); - module_put(THIS_MODULE); - - return 0; -} - -/* - * Set extended error reporting (eer) - * Note: This will be registered as a DASD ioctl, to be called on DASD devices. - */ -static int -dasd_ioctl_set_eer(struct block_device *bdev, int no, long args) -{ - struct dasd_device *device; - int intval; - - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (bdev != bdev->bd_contains) - /* Error-reporting is not allowed for partitions */ - return -EINVAL; - if (get_user(intval, (int __user *) args)) - return -EFAULT; - device = bdev->bd_disk->private_data; - if (device == NULL) - return -ENODEV; - - intval = (intval != 0); - DEV_MESSAGE (KERN_DEBUG, device, - "set eer on device to %d", intval); - if (intval) - return dasd_eer_enable_on_device(device); - else - return dasd_eer_disable_on_device(device); -} - -/* - * Get value of extended error reporting. - * Note: This will be registered as a DASD ioctl, to be called on DASD devices. - */ -static int -dasd_ioctl_get_eer(struct block_device *bdev, int no, long args) -{ - struct dasd_device *device; - - device = bdev->bd_disk->private_data; - if (device == NULL) - return -ENODEV; - return put_user((device->eer != NULL), (int __user *) args); -} - -/* - * The following function can be used for those triggers that have - * all necessary data available when the function is called. - * If the parameter cqr is not NULL, the chain of requests will be searched - * for valid sense data, and all valid sense data sets will be added to - * the triggers data. - */ -static int -dasd_eer_write_standard_trigger(int trigger, struct dasd_device *device, - struct dasd_ccw_req *cqr) -{ - struct dasd_ccw_req *temp_cqr; - int data_size; - struct timeval tv; - struct dasd_eer_header header; - unsigned long flags; - struct eerbuffer *eerb; - - /* go through cqr chain and count the valid sense data sets */ - temp_cqr = cqr; - data_size = 0; - while (temp_cqr) { - if (temp_cqr->irb.esw.esw0.erw.cons) - data_size += 32; - temp_cqr = temp_cqr->refers; - } - - header.total_size = sizeof(header) + data_size + 4; /* "EOR" */ - header.trigger = trigger; - do_gettimeofday(&tv); - header.tv_sec = tv.tv_sec; - header.tv_usec = tv.tv_usec; - strncpy(header.busid, device->cdev->dev.bus_id, DASD_EER_BUSID_SIZE); - - spin_lock_irqsave(&bufferlock, flags); - list_for_each_entry(eerb, &bufferlist, list) { - dasd_eer_start_record(eerb, header.total_size); - dasd_eer_write_buffer(eerb, sizeof(header), (char*)(&header)); - temp_cqr = cqr; - while (temp_cqr) { - if (temp_cqr->irb.esw.esw0.erw.cons) - dasd_eer_write_buffer(eerb, 32, cqr->irb.ecw); - temp_cqr = temp_cqr->refers; - } - dasd_eer_write_buffer(eerb, 4,"EOR"); - } - spin_unlock_irqrestore(&bufferlock, flags); - - wake_up_interruptible(&dasd_eer_read_wait_queue); - - return 0; -} - -/* - * This function writes a DASD_EER_STATECHANGE trigger. - */ -static void -dasd_eer_write_SNSS_trigger(struct dasd_device *device, - struct dasd_ccw_req *cqr) -{ - int data_size; - int snss_rc; - struct timeval tv; - struct dasd_eer_header header; - unsigned long flags; - struct eerbuffer *eerb; - - snss_rc = (cqr->status == DASD_CQR_FAILED) ? -EIO : 0; - if (snss_rc) - data_size = 0; - else - data_size = SNSS_DATA_SIZE; - - header.total_size = sizeof(header) + data_size + 4; /* "EOR" */ - header.trigger = DASD_EER_STATECHANGE; - do_gettimeofday(&tv); - header.tv_sec = tv.tv_sec; - header.tv_usec = tv.tv_usec; - strncpy(header.busid, device->cdev->dev.bus_id, DASD_EER_BUSID_SIZE); - - spin_lock_irqsave(&bufferlock, flags); - list_for_each_entry(eerb, &bufferlist, list) { - dasd_eer_start_record(eerb, header.total_size); - dasd_eer_write_buffer(eerb, sizeof(header),(char*)(&header)); - if (!snss_rc) - dasd_eer_write_buffer(eerb, SNSS_DATA_SIZE, cqr->data); - dasd_eer_write_buffer(eerb, 4,"EOR"); - } - spin_unlock_irqrestore(&bufferlock, flags); - - wake_up_interruptible(&dasd_eer_read_wait_queue); -} - -/* - * callback function for use with SNSS request - */ -static void -dasd_eer_SNSS_cb(struct dasd_ccw_req *cqr, void *data) -{ - struct dasd_device *device; - struct dasd_eer_private *private; - unsigned long irqflags; - - device = (struct dasd_device *)data; - private = (struct dasd_eer_private *)device->eer; - dasd_eer_write_SNSS_trigger(device, cqr); - spin_lock_irqsave(&snsslock, irqflags); - if(!test_and_clear_bit(SNSS_REQUESTED, &private->flags)) { - clear_bit(CQR_IN_USE, &private->flags); - spin_unlock_irqrestore(&snsslock, irqflags); - return; - }; - clear_bit(CQR_IN_USE, &private->flags); - spin_unlock_irqrestore(&snsslock, irqflags); - dasd_eer_sense_subsystem_status(device); - return; -} - -/* - * clean a used cqr before using it again - */ -static void -dasd_eer_clean_SNSS_request(struct dasd_ccw_req *cqr) -{ - struct ccw1 *cpaddr = cqr->cpaddr; - void *data = cqr->data; - - memset(cqr, 0, sizeof(struct dasd_ccw_req)); - memset(cpaddr, 0, sizeof(struct ccw1)); - memset(data, 0, SNSS_DATA_SIZE); - cqr->cpaddr = cpaddr; - cqr->data = data; - strncpy((char *) &cqr->magic, "ECKD", 4); - ASCEBC((char *) &cqr->magic, 4); - set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); -} - -/* - * build and start an SNSS request - * This function is called from a work queue so we have to - * pass the dasd_device pointer as a void pointer. - */ -static void -dasd_eer_sense_subsystem_status(void *data) -{ - struct dasd_device *device; - struct dasd_eer_private *private; - struct dasd_ccw_req *cqr; - struct ccw1 *ccw; - unsigned long irqflags; - - device = (struct dasd_device *)data; - private = (struct dasd_eer_private *)device->eer; - if (!private) /* device not eer enabled any more */ - return; - cqr = private->cqr; - spin_lock_irqsave(&snsslock, irqflags); - if(test_and_set_bit(CQR_IN_USE, &private->flags)) { - set_bit(SNSS_REQUESTED, &private->flags); - spin_unlock_irqrestore(&snsslock, irqflags); - return; - }; - spin_unlock_irqrestore(&snsslock, irqflags); - dasd_eer_clean_SNSS_request(cqr); - cqr->device = device; - cqr->retries = 255; - cqr->expires = 10 * HZ; - - ccw = cqr->cpaddr; - ccw->cmd_code = DASD_ECKD_CCW_SNSS; - ccw->count = SNSS_DATA_SIZE; - ccw->flags = 0; - ccw->cda = (__u32)(addr_t)cqr->data; - - cqr->buildclk = get_clock(); - cqr->status = DASD_CQR_FILLED; - cqr->callback = dasd_eer_SNSS_cb; - cqr->callback_data = (void *)device; - dasd_add_request_head(cqr); - - return; -} - -/* - * This function is called for all triggers. It calls the appropriate - * function that writes the actual trigger records. - */ -static int -dasd_eer_write_trigger(struct dasd_eer_trigger *trigger) -{ - int rc; - struct dasd_eer_private *private = trigger->device->eer; - - switch (trigger->id) { - case DASD_EER_FATALERROR: - case DASD_EER_PPRCSUSPEND: - rc = dasd_eer_write_standard_trigger( - trigger->id, trigger->device, trigger->cqr); - break; - case DASD_EER_NOPATH: - rc = dasd_eer_write_standard_trigger( - trigger->id, trigger->device, NULL); - break; - case DASD_EER_STATECHANGE: - if (queue_work(dasd_eer_workqueue, &private->worker)) { - rc=0; - } else { - /* If the work_struct was already queued, it can't - * be queued again. But this is OK since we don't - * need to have it queued twice. - */ - rc = -EBUSY; - } - break; - default: /* unknown trigger, so we write it without any sense data */ - rc = dasd_eer_write_standard_trigger( - trigger->id, trigger->device, NULL); - break; - } - return rc; -} - -/* - * This function is registered with the dasd device driver and gets called - * for all dasd eer notifications. - */ -static int dasd_eer_notify(struct notifier_block *self, - unsigned long action, void *data) -{ - switch (action) { - case DASD_EER_DISABLE: - dasd_eer_disable_on_device((struct dasd_device *)data); - break; - case DASD_EER_TRIGGER: - dasd_eer_write_trigger((struct dasd_eer_trigger *)data); - break; - } - return NOTIFY_OK; -} - - -/*****************************************************************************/ -/* the device operations */ -/*****************************************************************************/ - -/* - * On the one side we need a lock to access our internal buffer, on the - * other side a copy_to_user can sleep. So we need to copy the data we have - * to transfer in a readbuffer, which is protected by the readbuffer_mutex. - */ -static char readbuffer[PAGE_SIZE]; -DECLARE_MUTEX(readbuffer_mutex); - - -static int -dasd_eer_open(struct inode *inp, struct file *filp) -{ - struct eerbuffer *eerb; - unsigned long flags; - - eerb = kmalloc(sizeof(struct eerbuffer), GFP_KERNEL); - eerb->head = 0; - eerb->tail = 0; - eerb->residual = 0; - eerb->buffer_page_count = 1; - eerb->buffersize = eerb->buffer_page_count * PAGE_SIZE; - eerb->buffer = kmalloc(eerb->buffer_page_count*sizeof(char*), - GFP_KERNEL); - if (!eerb->buffer) - return -ENOMEM; - if (dasd_eer_allocate_buffer_pages(eerb->buffer, - eerb->buffer_page_count)) { - kfree(eerb->buffer); - return -ENOMEM; - } - filp->private_data = eerb; - spin_lock_irqsave(&bufferlock, flags); - list_add(&eerb->list, &bufferlist); - spin_unlock_irqrestore(&bufferlock, flags); - - return nonseekable_open(inp,filp); -} - -static int -dasd_eer_close(struct inode *inp, struct file *filp) -{ - struct eerbuffer *eerb; - unsigned long flags; - - eerb = (struct eerbuffer *)filp->private_data; - spin_lock_irqsave(&bufferlock, flags); - list_del(&eerb->list); - spin_unlock_irqrestore(&bufferlock, flags); - dasd_eer_free_buffer_pages(eerb->buffer, eerb->buffer_page_count); - kfree(eerb->buffer); - kfree(eerb); - - return 0; -} - -static long -dasd_eer_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - int intval; - struct eerbuffer *eerb; - - eerb = (struct eerbuffer *)filp->private_data; - switch (cmd) { - case DASD_EER_PURGE: - dasd_eer_purge_buffer(eerb); - return 0; - case DASD_EER_SETBUFSIZE: - if (get_user(intval, (int __user *)arg)) - return -EFAULT; - return dasd_eer_resize_buffer(eerb, intval); - default: - return -ENOIOCTLCMD; - } -} - -static ssize_t -dasd_eer_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) -{ - int tc,rc; - int tailcount,effective_count; - unsigned long flags; - struct eerbuffer *eerb; - - eerb = (struct eerbuffer *)filp->private_data; - if(down_interruptible(&readbuffer_mutex)) - return -ERESTARTSYS; - - spin_lock_irqsave(&bufferlock, flags); - - if (eerb->residual < 0) { /* the remainder of this record */ - /* has been deleted */ - eerb->residual = 0; - spin_unlock_irqrestore(&bufferlock, flags); - up(&readbuffer_mutex); - return -EIO; - } else if (eerb->residual > 0) { - /* OK we still have a second half of a record to deliver */ - effective_count = min(eerb->residual, (int)count); - eerb->residual -= effective_count; - } else { - tc = 0; - while (!tc) { - tc = dasd_eer_read_buffer(eerb, - sizeof(tailcount), (char*)(&tailcount)); - if (!tc) { - /* no data available */ - spin_unlock_irqrestore(&bufferlock, flags); - up(&readbuffer_mutex); - if (filp->f_flags & O_NONBLOCK) - return -EAGAIN; - rc = wait_event_interruptible( - dasd_eer_read_wait_queue, - eerb->head != eerb->tail); - if (rc) { - return rc; - } - if(down_interruptible(&readbuffer_mutex)) - return -ERESTARTSYS; - spin_lock_irqsave(&bufferlock, flags); - } - } - WARN_ON(tc != sizeof(tailcount)); - effective_count = min(tailcount,(int)count); - eerb->residual = tailcount - effective_count; - } - - tc = dasd_eer_read_buffer(eerb, effective_count, readbuffer); - WARN_ON(tc != effective_count); - - spin_unlock_irqrestore(&bufferlock, flags); - - if (copy_to_user(buf, readbuffer, effective_count)) { - up(&readbuffer_mutex); - return -EFAULT; - } - - up(&readbuffer_mutex); - return effective_count; -} - -static unsigned int -dasd_eer_poll (struct file *filp, poll_table *ptable) -{ - unsigned int mask; - unsigned long flags; - struct eerbuffer *eerb; - - eerb = (struct eerbuffer *)filp->private_data; - poll_wait(filp, &dasd_eer_read_wait_queue, ptable); - spin_lock_irqsave(&bufferlock, flags); - if (eerb->head != eerb->tail) - mask = POLLIN | POLLRDNORM ; - else - mask = 0; - spin_unlock_irqrestore(&bufferlock, flags); - return mask; -} - -static struct file_operations dasd_eer_fops = { - .open = &dasd_eer_open, - .release = &dasd_eer_close, - .unlocked_ioctl = &dasd_eer_ioctl, - .compat_ioctl = &dasd_eer_ioctl, - .read = &dasd_eer_read, - .poll = &dasd_eer_poll, - .owner = THIS_MODULE, -}; - -static struct miscdevice dasd_eer_dev = { - .minor = MISC_DYNAMIC_MINOR, - .name = "dasd_eer", - .fops = &dasd_eer_fops, -}; - - -/*****************************************************************************/ -/* Init and exit */ -/*****************************************************************************/ - -static int -__init dasd_eer_init(void) -{ - int rc; - - dasd_eer_workqueue = create_singlethread_workqueue("dasd_eer"); - if (!dasd_eer_workqueue) { - MESSAGE(KERN_ERR , "%s", "dasd_eer_init could not " - "create workqueue \n"); - rc = -ENOMEM; - goto out; - } - - rc = dasd_register_eer_notifier(&dasd_eer_nb); - if (rc) { - MESSAGE(KERN_ERR, "%s", "dasd_eer_init could not " - "register error reporting"); - goto queue; - } - - dasd_ioctl_no_register(THIS_MODULE, BIODASDEERSET, dasd_ioctl_set_eer); - dasd_ioctl_no_register(THIS_MODULE, BIODASDEERGET, dasd_ioctl_get_eer); - - /* we don't need our own character device, - * so we just register as misc device */ - rc = misc_register(&dasd_eer_dev); - if (rc) { - MESSAGE(KERN_ERR, "%s", "dasd_eer_init could not " - "register misc device"); - goto unregister; - } - - return 0; - -unregister: - dasd_unregister_eer_notifier(&dasd_eer_nb); - dasd_ioctl_no_unregister(THIS_MODULE, BIODASDEERSET, - dasd_ioctl_set_eer); - dasd_ioctl_no_unregister(THIS_MODULE, BIODASDEERGET, - dasd_ioctl_get_eer); -queue: - destroy_workqueue(dasd_eer_workqueue); -out: - return rc; - -} -module_init(dasd_eer_init); - -static void -__exit dasd_eer_exit(void) -{ - dasd_unregister_eer_notifier(&dasd_eer_nb); - dasd_ioctl_no_unregister(THIS_MODULE, BIODASDEERSET, - dasd_ioctl_set_eer); - dasd_ioctl_no_unregister(THIS_MODULE, BIODASDEERGET, - dasd_ioctl_get_eer); - destroy_workqueue(dasd_eer_workqueue); - - WARN_ON(misc_deregister(&dasd_eer_dev) != 0); -} -module_exit(dasd_eer_exit); diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 5efac1b97ece..0592354cc604 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -275,34 +275,6 @@ struct dasd_discipline { extern struct dasd_discipline *dasd_diag_discipline_pointer; - -/* - * Notification numbers for extended error reporting notifications: - * The DASD_EER_DISABLE notification is sent before a dasd_device (and it's - * eer pointer) is freed. The error reporting module needs to do all necessary - * cleanup steps. - * The DASD_EER_TRIGGER notification sends the actual error reports (triggers). - */ -#define DASD_EER_DISABLE 0 -#define DASD_EER_TRIGGER 1 - -/* Trigger IDs for extended error reporting DASD_EER_TRIGGER notification */ -#define DASD_EER_FATALERROR 1 -#define DASD_EER_NOPATH 2 -#define DASD_EER_STATECHANGE 3 -#define DASD_EER_PPRCSUSPEND 4 - -/* - * The dasd_eer_trigger structure contains all data that we need to send - * along with an DASD_EER_TRIGGER notification. - */ -struct dasd_eer_trigger { - unsigned int id; - struct dasd_device *device; - struct dasd_ccw_req *cqr; -}; - - struct dasd_device { /* Block device stuff. */ struct gendisk *gdp; @@ -316,9 +288,6 @@ struct dasd_device { unsigned long flags; /* per device flags */ unsigned short features; /* copy of devmap-features (read-only!) */ - /* extended error reporting stuff (eer) */ - void *eer; - /* Device discipline stuff. */ struct dasd_discipline *discipline; struct dasd_discipline *base_discipline; @@ -520,12 +489,6 @@ int dasd_generic_set_online(struct ccw_device *, struct dasd_discipline *); int dasd_generic_set_offline (struct ccw_device *cdev); int dasd_generic_notify(struct ccw_device *, int); void dasd_generic_auto_online (struct ccw_driver *); -int dasd_register_eer_notifier(struct notifier_block *); -int dasd_unregister_eer_notifier(struct notifier_block *); -void dasd_write_eer_trigger(unsigned int , struct dasd_device *, - struct dasd_ccw_req *); - - /* externals in dasd_devmap.c */ extern int dasd_max_devindex; diff --git a/include/asm-s390/dasd.h b/include/asm-s390/dasd.h index c744ff33b1df..1630c26e8f45 100644 --- a/include/asm-s390/dasd.h +++ b/include/asm-s390/dasd.h @@ -204,8 +204,7 @@ typedef struct attrib_data_t { * * Here ist how the ioctl-nr should be used: * 0 - 31 DASD driver itself - * 32 - 229 still open - * 230 - 239 DASD extended error reporting + * 32 - 239 still open * 240 - 255 reserved for EMC *******************************************************************************/ @@ -237,22 +236,12 @@ typedef struct attrib_data_t { #define BIODASDPSRD _IOR(DASD_IOCTL_LETTER,4,dasd_rssd_perf_stats_t) /* Get Attributes (cache operations) */ #define BIODASDGATTR _IOR(DASD_IOCTL_LETTER,5,attrib_data_t) -/* retrieve extended error-reporting value */ -#define BIODASDEERGET _IOR(DASD_IOCTL_LETTER,6,int) /* #define BIODASDFORMAT _IOW(IOCTL_LETTER,0,format_data_t) , deprecated */ #define BIODASDFMT _IOW(DASD_IOCTL_LETTER,1,format_data_t) /* Set Attributes (cache operations) */ #define BIODASDSATTR _IOW(DASD_IOCTL_LETTER,2,attrib_data_t) -/* retrieve extended error-reporting value */ -#define BIODASDEERSET _IOW(DASD_IOCTL_LETTER,3,int) - - -/* remove all records from the eer buffer */ -#define DASD_EER_PURGE _IO(DASD_IOCTL_LETTER,230) -/* set the number of pages that are used for the internal eer buffer */ -#define DASD_EER_SETBUFSIZE _IOW(DASD_IOCTL_LETTER,230,int) #endif /* DASD_H */ -- cgit v1.2.3 From b04ec261bd64f927bf3fce5cf9eeb0225557939d Mon Sep 17 00:00:00 2001 From: Hirokazu Takata Date: Mon, 20 Feb 2006 18:28:15 -0800 Subject: [PATCH] m32r: __cmpxchg_u32 fix This patch fixes a bug of include/asm-m32r/system.h:__cmpxchg_u32(). static __inline__ unsigned long __cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new); In __cmpxchg_u32(), the "old" value must not be changed to the previous "*p" value. But the former code modifies the previous "*p" value. A deadlock at _atomic_dec_and_lock sometimes happened due to this bug. Signed-off-by: Hayato Fujiwara Signed-off-by: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-m32r/system.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-m32r/system.h b/include/asm-m32r/system.h index 06c12a037cba..d6a2c613be68 100644 --- a/include/asm-m32r/system.h +++ b/include/asm-m32r/system.h @@ -239,7 +239,7 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new) " bra 2f; \n" " .fillinsn \n" "1:" - M32R_UNLOCK" %2, @%1; \n" + M32R_UNLOCK" %0, @%1; \n" " .fillinsn \n" "2:" : "=&r" (retval) -- cgit v1.2.3 From 8ecbbcaf08c13c57d6602472478739d64650ee0e Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Tue, 14 Feb 2006 15:57:50 +0900 Subject: [MIPS] Fixes for uaccess.h with gcc >= 4.0.1 It seems current get_user() incorrectly sign-extend an unsigned int value on 64bit kernel. I think this is because '(__typeof__(val))' cast in final assignment. I suppose the cast should be '(__typeof__(*(addr))'. Signed-off-by: Atsushi Nemoto Signed-off-by: Ralf Baechle --- include/asm-mips/uaccess.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-mips/uaccess.h b/include/asm-mips/uaccess.h index 7a553e9d44d3..b96f3e0f3933 100644 --- a/include/asm-mips/uaccess.h +++ b/include/asm-mips/uaccess.h @@ -233,7 +233,7 @@ do { \ #define __get_user_check(x,ptr,size) \ ({ \ long __gu_err = -EFAULT; \ - const void __user * __gu_ptr = (ptr); \ + const __typeof__(*(ptr)) __user * __gu_ptr = (ptr); \ \ if (likely(access_ok(VERIFY_READ, __gu_ptr, size))) \ __get_user_common((x), size, __gu_ptr); \ @@ -258,7 +258,7 @@ do { \ : "=r" (__gu_err), "=r" (__gu_tmp) \ : "0" (0), "o" (__m(addr)), "i" (-EFAULT)); \ \ - (val) = (__typeof__(val)) __gu_tmp; \ + (val) = (__typeof__(*(addr))) __gu_tmp; \ } /* @@ -284,7 +284,7 @@ do { \ " .previous \n" \ : "=r" (__gu_err), "=&r" (__gu_tmp) \ : "0" (0), "r" (addr), "i" (-EFAULT)); \ - (val) = __gu_tmp; \ + (val) = (__typeof__(*(addr))) __gu_tmp; \ } /* -- cgit v1.2.3 From 124273773596cbf8aa9c79304b01091d4693f368 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 14 Feb 2006 14:22:10 +0000 Subject: [MIPS] Follow Uli's latest *at syscall changes. (This really is only the half of the patch which was forgotten in 326a625748535c4cdb1c632b1dcb07030989a393 ...) Signed-off-by: Ralf Baechle --- include/asm-mips/unistd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h index 769305d20108..b5c78a4a0192 100644 --- a/include/asm-mips/unistd.h +++ b/include/asm-mips/unistd.h @@ -313,7 +313,7 @@ #define __NR_mknodat (__NR_Linux + 290) #define __NR_fchownat (__NR_Linux + 291) #define __NR_futimesat (__NR_Linux + 292) -#define __NR_newfstatat (__NR_Linux + 293) +#define __NR_fstatat (__NR_Linux + 293) #define __NR_unlinkat (__NR_Linux + 294) #define __NR_renameat (__NR_Linux + 295) #define __NR_linkat (__NR_Linux + 296) @@ -593,7 +593,7 @@ #define __NR_mknodat (__NR_Linux + 249) #define __NR_fchownat (__NR_Linux + 250) #define __NR_futimesat (__NR_Linux + 251) -#define __NR_newfstatat (__NR_Linux + 252) +#define __NR_fstatat (__NR_Linux + 252) #define __NR_unlinkat (__NR_Linux + 253) #define __NR_renameat (__NR_Linux + 254) #define __NR_linkat (__NR_Linux + 255) @@ -877,7 +877,7 @@ #define __NR_mknodat (__NR_Linux + 253) #define __NR_fchownat (__NR_Linux + 254) #define __NR_futimesat (__NR_Linux + 255) -#define __NR_newfstatat (__NR_Linux + 256) +#define __NR_fstatat (__NR_Linux + 256) #define __NR_unlinkat (__NR_Linux + 257) #define __NR_renameat (__NR_Linux + 258) #define __NR_linkat (__NR_Linux + 259) -- cgit v1.2.3 From 5bd546aa78b5d74f3162815e41940f862215d9e3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 17 Feb 2006 20:23:29 +0000 Subject: [MMC] Fix mmc_cmd_type() mask It's MMC_CMD_MASK not MMC_CMD_TYPE. Signed-off-by: Russell King --- include/linux/mmc/mmc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index f38872abc126..bdc556d88498 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -49,7 +49,7 @@ struct mmc_command { /* * These are the command types. */ -#define mmc_cmd_type(cmd) ((cmd)->flags & MMC_CMD_TYPE) +#define mmc_cmd_type(cmd) ((cmd)->flags & MMC_CMD_MASK) unsigned int retries; /* max number of retries */ unsigned int error; /* command error */ -- cgit v1.2.3 From fa675765afed59bb89adba3369094ebd428b930b Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 22 Feb 2006 09:39:02 -0800 Subject: Revert mount/umount uevent removal This change reverts the 033b96fd30db52a710d97b06f87d16fc59fee0f1 commit from Kay Sievers that removed the mount/umount uevents from the kernel. Some older versions of HAL still depend on these events to detect when a new device has been mounted. These events are not correctly emitted, and are broken by design, and so, should not be relied upon by any future program. Instead, the /proc/mounts file should be polled to properly detect this kind of event. A feature-removal-schedule.txt entry has been added, noting when this interface will be removed from the kernel. Signed-off-by: Greg Kroah-Hartman --- Documentation/feature-removal-schedule.txt | 9 +++++++++ fs/super.c | 15 ++++++++++++++- include/linux/kobject.h | 6 ++++-- lib/kobject_uevent.c | 4 ++++ 4 files changed, 31 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index b730d765b525..be5ae600f533 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -171,3 +171,12 @@ Why: The ISA interface is faster and should be always available. The I2C probing is also known to cause trouble in at least one case (see bug #5889.) Who: Jean Delvare + +--------------------------- + +What: mount/umount uevents +When: February 2007 +Why: These events are not correct, and do not properly let userspace know + when a file system has been mounted or unmounted. Userspace should + poll the /proc/mounts file instead to detect this properly. +Who: Greg Kroah-Hartman diff --git a/fs/super.c b/fs/super.c index 30294218fa63..e20b5580afd5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -666,6 +666,16 @@ static int test_bdev_super(struct super_block *s, void *data) return (void *)s->s_bdev == data; } +static void bdev_uevent(struct block_device *bdev, enum kobject_action action) +{ + if (bdev->bd_disk) { + if (bdev->bd_part) + kobject_uevent(&bdev->bd_part->kobj, action); + else + kobject_uevent(&bdev->bd_disk->kobj, action); + } +} + struct super_block *get_sb_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)) @@ -707,8 +717,10 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, up_write(&s->s_umount); deactivate_super(s); s = ERR_PTR(error); - } else + } else { s->s_flags |= MS_ACTIVE; + bdev_uevent(bdev, KOBJ_MOUNT); + } } return s; @@ -724,6 +736,7 @@ void kill_block_super(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; + bdev_uevent(bdev, KOBJ_UMOUNT); generic_shutdown_super(sb); sync_blockdev(bdev); close_bdev_excl(bdev); diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 2a8d8da70961..c374b5fa8d3b 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -41,8 +41,10 @@ enum kobject_action { KOBJ_ADD = (__force kobject_action_t) 0x01, /* exclusive to core */ KOBJ_REMOVE = (__force kobject_action_t) 0x02, /* exclusive to core */ KOBJ_CHANGE = (__force kobject_action_t) 0x03, /* device state change */ - KOBJ_OFFLINE = (__force kobject_action_t) 0x04, /* device offline */ - KOBJ_ONLINE = (__force kobject_action_t) 0x05, /* device online */ + KOBJ_MOUNT = (__force kobject_action_t) 0x04, /* mount event for block devices (broken) */ + KOBJ_UMOUNT = (__force kobject_action_t) 0x05, /* umount event for block devices (broken) */ + KOBJ_OFFLINE = (__force kobject_action_t) 0x06, /* device offline */ + KOBJ_ONLINE = (__force kobject_action_t) 0x07, /* device online */ }; struct kobject { diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 1b1985c136ec..086a0c6e888e 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -38,6 +38,10 @@ static char *action_to_string(enum kobject_action action) return "remove"; case KOBJ_CHANGE: return "change"; + case KOBJ_MOUNT: + return "mount"; + case KOBJ_UMOUNT: + return "umount"; case KOBJ_OFFLINE: return "offline"; case KOBJ_ONLINE: -- cgit v1.2.3 From af898b8f602441a3bebe918a3b26adc92b30762e Mon Sep 17 00:00:00 2001 From: Alessandro Zummo Date: Wed, 22 Feb 2006 21:12:06 +0000 Subject: [ARM] 3343/1: NAS100d: Fix incorrect I2C pin assignment Patch from Alessandro Zummo The I2C pin assignment for the Iomega NAS100d board was incorrect. This patch fixes it. The correct assignment has now been tested using the new RTC class and a new driver for the RTC on the NAS100d. Signed-off-by: Rod Whitby Signed-off-by: Alessandro Zummo Signed-off-by: Russell King --- include/asm-arm/arch-ixp4xx/nas100d.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-arm/arch-ixp4xx/nas100d.h b/include/asm-arm/arch-ixp4xx/nas100d.h index 51ac0180427c..84467a5190d0 100644 --- a/include/asm-arm/arch-ixp4xx/nas100d.h +++ b/include/asm-arm/arch-ixp4xx/nas100d.h @@ -19,8 +19,8 @@ #error "Do not include this directly, instead #include " #endif -#define NAS100D_SDA_PIN 6 -#define NAS100D_SCL_PIN 5 +#define NAS100D_SDA_PIN 5 +#define NAS100D_SCL_PIN 6 /* * NAS100D PCI IRQs -- cgit v1.2.3 From df666b9c510fd27fd3b1afd9ddfa1eaa62ce12b3 Mon Sep 17 00:00:00 2001 From: Andrew Victor Date: Wed, 22 Feb 2006 21:23:35 +0000 Subject: [ARM] 3325/2: GPIO function to control multi-drive (open collector) capability Patch from Andrew Victor This patch adds the at91_set_multi_drive() function to enable/disable the multi-drive (open collector) pin capability on the AT91RM9200 processor. This is necessary to fix the UDC (USB Gadget) driver for the AT91RM9200 board as it will not allow the board reset line to be pulled low if the pullup is not driven as an open collector output as the boards are wired to the USB connector on both the DK/EK. This version of the patch updates it to 2.6.16-rc4. Orignal patch by Jeff Warren. Signed-off-by: Andrew Victor Signed-off-by: Russell King --- arch/arm/mach-at91rm9200/devices.c | 4 +++- arch/arm/mach-at91rm9200/gpio.c | 17 +++++++++++++++++ include/asm-arm/arch-at91rm9200/gpio.h | 1 + 3 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/arm/mach-at91rm9200/devices.c b/arch/arm/mach-at91rm9200/devices.c index 8df3e5245651..57eedd5beaf6 100644 --- a/arch/arm/mach-at91rm9200/devices.c +++ b/arch/arm/mach-at91rm9200/devices.c @@ -100,8 +100,10 @@ void __init at91_add_device_udc(struct at91_udc_data *data) at91_set_gpio_input(data->vbus_pin, 0); at91_set_deglitch(data->vbus_pin, 1); } - if (data->pullup_pin) + if (data->pullup_pin) { at91_set_gpio_output(data->pullup_pin, 0); + at91_set_multi_drive(data->pullup_pin, 1); + } udc_data = *data; platform_device_register(&at91rm9200_udc_device); diff --git a/arch/arm/mach-at91rm9200/gpio.c b/arch/arm/mach-at91rm9200/gpio.c index 2fd2ef583e4d..a9f718bf8ba8 100644 --- a/arch/arm/mach-at91rm9200/gpio.c +++ b/arch/arm/mach-at91rm9200/gpio.c @@ -159,6 +159,23 @@ int __init_or_module at91_set_deglitch(unsigned pin, int is_on) } EXPORT_SYMBOL(at91_set_deglitch); +/* + * enable/disable the multi-driver; This is only valid for output and + * allows the output pin to run as an open collector output. + */ +int __init_or_module at91_set_multi_drive(unsigned pin, int is_on) +{ + void __iomem *pio = pin_to_controller(pin); + unsigned mask = pin_to_mask(pin); + + if (!pio) + return -EINVAL; + + __raw_writel(mask, pio + (is_on ? PIO_MDER : PIO_MDDR)); + return 0; +} +EXPORT_SYMBOL(at91_set_multi_drive); + /*--------------------------------------------------------------------------*/ diff --git a/include/asm-arm/arch-at91rm9200/gpio.h b/include/asm-arm/arch-at91rm9200/gpio.h index 0f0a61e2f129..6176ab2dc417 100644 --- a/include/asm-arm/arch-at91rm9200/gpio.h +++ b/include/asm-arm/arch-at91rm9200/gpio.h @@ -183,6 +183,7 @@ extern int at91_set_B_periph(unsigned pin, int use_pullup); extern int at91_set_gpio_input(unsigned pin, int use_pullup); extern int at91_set_gpio_output(unsigned pin, int value); extern int at91_set_deglitch(unsigned pin, int is_on); +extern int at91_set_multi_drive(unsigned pin, int is_on); /* callable at any time */ extern int at91_set_gpio_value(unsigned pin, int value); -- cgit v1.2.3 From 85edae14e4ee5e68cf037e9e4bca7498ea16874d Mon Sep 17 00:00:00 2001 From: Michal Janusz Miroslaw Date: Thu, 23 Feb 2006 09:49:35 +0000 Subject: [SERIAL] Trivial comment fix: include/linux/serial_reg.h Trivial comment fix for include/linux/serial_reg.h Signed-off-by: Russell King --- include/linux/serial_reg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h index 6a2bb955844b..3c8a6aa77415 100644 --- a/include/linux/serial_reg.h +++ b/include/linux/serial_reg.h @@ -247,10 +247,10 @@ #define UART_CTR 0xFF /* - * The 16C950 Additional Control Reigster + * The 16C950 Additional Control Register */ #define UART_ACR_RXDIS 0x01 /* Receiver disable */ -#define UART_ACR_TXDIS 0x02 /* Receiver disable */ +#define UART_ACR_TXDIS 0x02 /* Transmitter disable */ #define UART_ACR_DSRFC 0x04 /* DSR Flow Control */ #define UART_ACR_TLENB 0x20 /* 950 trigger levels enable */ #define UART_ACR_ICRRD 0x40 /* ICR Read enable */ -- cgit v1.2.3 From 21380b81ef8699179b535e197a95b891a7badac7 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 22 Feb 2006 14:47:13 -0800 Subject: [XFRM]: Eliminate refcounting confusion by creating __xfrm_state_put(). We often just do an atomic_dec(&x->refcnt) on an xfrm_state object because we know there is more than 1 reference remaining and thus we can elide the heavier xfrm_state_put() call. Do this behind an inline function called __xfrm_state_put() so that is more obvious and also to allow us to more cleanly add refcount debugging later. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 5 +++++ net/key/af_key.c | 2 +- net/xfrm/xfrm_state.c | 8 ++++---- net/xfrm/xfrm_user.c | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d6111a2f0a23..004e645f3e18 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -403,6 +403,11 @@ unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short f extern void __xfrm_state_destroy(struct xfrm_state *); +static inline void __xfrm_state_put(struct xfrm_state *x) +{ + atomic_dec(&x->refcnt); +} + static inline void xfrm_state_put(struct xfrm_state *x) { if (atomic_dec_and_test(&x->refcnt)) diff --git a/net/key/af_key.c b/net/key/af_key.c index ae86d237a456..b2d4d1dd2116 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1423,7 +1423,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, if (err < 0) { x->km.state = XFRM_STATE_DEAD; - xfrm_state_put(x); + __xfrm_state_put(x); goto out; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index e12d0be5f976..c656cbaf35e8 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -220,14 +220,14 @@ static int __xfrm_state_delete(struct xfrm_state *x) x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); list_del(&x->bydst); - atomic_dec(&x->refcnt); + __xfrm_state_put(x); if (x->id.spi) { list_del(&x->byspi); - atomic_dec(&x->refcnt); + __xfrm_state_put(x); } spin_unlock(&xfrm_state_lock); if (del_timer(&x->timer)) - atomic_dec(&x->refcnt); + __xfrm_state_put(x); /* The number two in this test is the reference * mentioned in the comment below plus the reference @@ -243,7 +243,7 @@ static int __xfrm_state_delete(struct xfrm_state *x) * The xfrm_state_alloc call gives a reference, and that * is what we are dropping here. */ - atomic_dec(&x->refcnt); + __xfrm_state_put(x); err = 0; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ac87a09ba83e..7de17559249a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -345,7 +345,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) if (err < 0) { x->km.state = XFRM_STATE_DEAD; - xfrm_state_put(x); + __xfrm_state_put(x); goto out; } -- cgit v1.2.3 From 337a7128dbe68ebe7627b6f954cb32d30d7b11c6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 21 Feb 2006 17:22:55 +1100 Subject: [PATCH] powerpc: Only calculate htab_size in one place for kexec For kexec we need to know the size of the MMU hash table. Currently we calculate the size once in the htab code, and then twice more in the kexec code, once using htab_hash_mask and once using ppc64_pft_size. On some machines the ppc64_pft_size calculation is broken because ppc64_pft_size is not set. So we need to fix the second calculation, but better still we should just calculate the size once and use it everywhere else. Tested on Power5 LPAR, Power4 non-LPAR and Power3. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/machine_kexec_64.c | 10 +++------- arch/powerpc/mm/hash_utils_64.c | 3 ++- include/asm-powerpc/mmu.h | 1 + 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index d6431440c54f..ee166c586642 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -26,8 +26,6 @@ #include #include -#define HASH_GROUP_SIZE 0x80 /* size of each hash group, asm/mmu.h */ - int default_machine_kexec_prepare(struct kimage *image) { int i; @@ -61,7 +59,7 @@ int default_machine_kexec_prepare(struct kimage *image) */ if (htab_address) { low = __pa(htab_address); - high = low + (htab_hash_mask + 1) * HASH_GROUP_SIZE; + high = low + htab_size_bytes; for (i = 0; i < image->nr_segments; i++) { begin = image->segment[i].mem; @@ -294,7 +292,7 @@ void default_machine_kexec(struct kimage *image) } /* Values we need to export to the second kernel via the device tree. */ -static unsigned long htab_base, htab_size, kernel_end; +static unsigned long htab_base, kernel_end; static struct property htab_base_prop = { .name = "linux,htab-base", @@ -305,7 +303,7 @@ static struct property htab_base_prop = { static struct property htab_size_prop = { .name = "linux,htab-size", .length = sizeof(unsigned long), - .value = (unsigned char *)&htab_size, + .value = (unsigned char *)&htab_size_bytes, }; static struct property kernel_end_prop = { @@ -331,8 +329,6 @@ static void __init export_htab_values(void) htab_base = __pa(htab_address); prom_add_property(node, &htab_base_prop); - - htab_size = 1UL << ppc64_pft_size; prom_add_property(node, &htab_size_prop); out: diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 149351a84b94..b1f614c612dd 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -88,6 +88,7 @@ static unsigned long _SDR1; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; hpte_t *htab_address; +unsigned long htab_size_bytes; unsigned long htab_hash_mask; int mmu_linear_psize = MMU_PAGE_4K; int mmu_virtual_psize = MMU_PAGE_4K; @@ -399,7 +400,7 @@ void create_section_mapping(unsigned long start, unsigned long end) void __init htab_initialize(void) { - unsigned long table, htab_size_bytes; + unsigned long table; unsigned long pteg_count; unsigned long mode_rw; unsigned long base = 0, size = 0; diff --git a/include/asm-powerpc/mmu.h b/include/asm-powerpc/mmu.h index d096d9e76ad7..b0b9a3f8cdc2 100644 --- a/include/asm-powerpc/mmu.h +++ b/include/asm-powerpc/mmu.h @@ -112,6 +112,7 @@ typedef struct { } hpte_t; extern hpte_t *htab_address; +extern unsigned long htab_size_bytes; extern unsigned long htab_hash_mask; /* -- cgit v1.2.3 From 1775dbbcd02cab0c41329dd2cec5b69c7fafd13f Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 22 Feb 2006 09:46:02 -0600 Subject: [PATCH] powerpc: Enable coherency for all pages on 83xx to fix PCI data corruption On the 83xx platform to ensure the PCI inbound memory is handled properly we have to turn on coherency for all pages in the MMU. Otherwise we see corruption if inbound "prefetching/streaming" is enabled on the PCI controller. Signed-off-by: Randy Vinson Signed-off-by: Kumar Gala Signed-off-by: Paul Mackerras --- include/asm-powerpc/cputable.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h index 64210549f56b..90d005bb4d1c 100644 --- a/include/asm-powerpc/cputable.h +++ b/include/asm-powerpc/cputable.h @@ -159,9 +159,11 @@ extern void do_cpu_ftr_fixups(unsigned long offset); #endif /* We need to mark all pages as being coherent if we're SMP or we - * have a 74[45]x and an MPC107 host bridge. + * have a 74[45]x and an MPC107 host bridge. Also 83xx requires + * it for PCI "streaming/prefetch" to work properly. */ -#if defined(CONFIG_SMP) || defined(CONFIG_MPC10X_BRIDGE) +#if defined(CONFIG_SMP) || defined(CONFIG_MPC10X_BRIDGE) \ + || defined(CONFIG_PPC_83xx) #define CPU_FTR_COMMON CPU_FTR_NEED_COHERENT #else #define CPU_FTR_COMMON 0 @@ -277,7 +279,8 @@ enum { CPU_FTRS_G2_LE = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS, CPU_FTRS_E300 = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_MAYBE_CAN_DOZE | - CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS, + CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS | + CPU_FTR_COMMON, CPU_FTRS_CLASSIC32 = CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE, CPU_FTRS_POWER3_32 = CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | -- cgit v1.2.3 From cb2c9b2741346eb23b177187a51ff5abf08295bd Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 13 Feb 2006 14:48:35 +1100 Subject: [PATCH] powerpc: Fix runlatch performance issues The runlatch SPR can take a lot of time to write. My original runlatch code would set it on every exception entry even though most of the time this was not required. It would also continually set it in the idle loop, which is an issue on an SMT capable processor. Now we cache the runlatch value in a threadinfo bit, and only check for it in decrementer and hardware interrupt exceptions as well as the idle loop. Boot on POWER3, POWER5 and iseries, and compile tested on pmac32. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/head_64.S | 12 ++---------- arch/powerpc/kernel/process.c | 32 ++++++++++++++++++++++++++++++++ arch/powerpc/platforms/iseries/setup.c | 1 + include/asm-powerpc/reg.h | 33 +++------------------------------ include/asm-powerpc/thread_info.h | 4 ++-- 5 files changed, 40 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 2b03a09fe5e9..bb845eed0e98 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -319,7 +319,6 @@ exception_marker: label##_pSeries: \ HMT_MEDIUM; \ mtspr SPRN_SPRG1,r13; /* save r13 */ \ - RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common) #define STD_EXCEPTION_ISERIES(n, label, area) \ @@ -327,7 +326,6 @@ label##_pSeries: \ label##_iSeries: \ HMT_MEDIUM; \ mtspr SPRN_SPRG1,r13; /* save r13 */ \ - RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_ISERIES_1(area); \ EXCEPTION_PROLOG_ISERIES_2; \ b label##_common @@ -337,7 +335,6 @@ label##_iSeries: \ label##_iSeries: \ HMT_MEDIUM; \ mtspr SPRN_SPRG1,r13; /* save r13 */ \ - RUNLATCH_ON(r13); \ EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN); \ lbz r10,PACAPROCENABLED(r13); \ cmpwi 0,r10,0; \ @@ -390,6 +387,7 @@ label##_common: \ label##_common: \ EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ DISABLE_INTS; \ + bl .ppc64_runlatch_on; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ b .ret_from_except_lite @@ -407,7 +405,6 @@ __start_interrupts: _machine_check_pSeries: HMT_MEDIUM mtspr SPRN_SPRG1,r13 /* save r13 */ - RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) . = 0x300 @@ -434,7 +431,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) data_access_slb_pSeries: HMT_MEDIUM mtspr SPRN_SPRG1,r13 - RUNLATCH_ON(r13) mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_DAR @@ -460,7 +456,6 @@ data_access_slb_pSeries: instruction_access_slb_pSeries: HMT_MEDIUM mtspr SPRN_SPRG1,r13 - RUNLATCH_ON(r13) mfspr r13,SPRN_SPRG3 /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */ @@ -491,7 +486,6 @@ instruction_access_slb_pSeries: .globl system_call_pSeries system_call_pSeries: HMT_MEDIUM - RUNLATCH_ON(r9) mr r9,r13 mfmsr r10 mfspr r13,SPRN_SPRG3 @@ -575,7 +569,6 @@ slb_miss_user_pseries: system_reset_fwnmi: HMT_MEDIUM mtspr SPRN_SPRG1,r13 /* save r13 */ - RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) .globl machine_check_fwnmi @@ -583,7 +576,6 @@ system_reset_fwnmi: machine_check_fwnmi: HMT_MEDIUM mtspr SPRN_SPRG1,r13 /* save r13 */ - RUNLATCH_ON(r13) EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) #ifdef CONFIG_PPC_ISERIES @@ -894,7 +886,6 @@ unrecov_fer: .align 7 .globl data_access_common data_access_common: - RUNLATCH_ON(r10) /* It wont fit in the 0x300 handler */ mfspr r10,SPRN_DAR std r10,PACA_EXGEN+EX_DAR(r13) mfspr r10,SPRN_DSISR @@ -1042,6 +1033,7 @@ hardware_interrupt_common: EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN) hardware_interrupt_entry: DISABLE_INTS + bl .ppc64_runlatch_on addi r3,r1,STACK_FRAME_OVERHEAD bl .do_IRQ b .ret_from_except_lite diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 57703994a063..c225cf154bfe 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -888,3 +888,35 @@ void dump_stack(void) show_stack(current, NULL); } EXPORT_SYMBOL(dump_stack); + +#ifdef CONFIG_PPC64 +void ppc64_runlatch_on(void) +{ + unsigned long ctrl; + + if (cpu_has_feature(CPU_FTR_CTRL) && !test_thread_flag(TIF_RUNLATCH)) { + HMT_medium(); + + ctrl = mfspr(SPRN_CTRLF); + ctrl |= CTRL_RUNLATCH; + mtspr(SPRN_CTRLT, ctrl); + + set_thread_flag(TIF_RUNLATCH); + } +} + +void ppc64_runlatch_off(void) +{ + unsigned long ctrl; + + if (cpu_has_feature(CPU_FTR_CTRL) && test_thread_flag(TIF_RUNLATCH)) { + HMT_medium(); + + clear_thread_flag(TIF_RUNLATCH); + + ctrl = mfspr(SPRN_CTRLF); + ctrl &= ~CTRL_RUNLATCH; + mtspr(SPRN_CTRLT, ctrl); + } +} +#endif diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c index 3f8790146b00..3ecc4a652d82 100644 --- a/arch/powerpc/platforms/iseries/setup.c +++ b/arch/powerpc/platforms/iseries/setup.c @@ -648,6 +648,7 @@ static void yield_shared_processor(void) * here and let the timer_interrupt code sort out the actual time. */ get_lppaca()->int_dword.fields.decr_int = 1; + ppc64_runlatch_on(); process_iSeries_events(); } diff --git a/include/asm-powerpc/reg.h b/include/asm-powerpc/reg.h index 12ecc9b9f285..72bfe3af0460 100644 --- a/include/asm-powerpc/reg.h +++ b/include/asm-powerpc/reg.h @@ -615,27 +615,9 @@ #define proc_trap() asm volatile("trap") #ifdef CONFIG_PPC64 -static inline void ppc64_runlatch_on(void) -{ - unsigned long ctrl; - - if (cpu_has_feature(CPU_FTR_CTRL)) { - ctrl = mfspr(SPRN_CTRLF); - ctrl |= CTRL_RUNLATCH; - mtspr(SPRN_CTRLT, ctrl); - } -} - -static inline void ppc64_runlatch_off(void) -{ - unsigned long ctrl; - - if (cpu_has_feature(CPU_FTR_CTRL)) { - ctrl = mfspr(SPRN_CTRLF); - ctrl &= ~CTRL_RUNLATCH; - mtspr(SPRN_CTRLT, ctrl); - } -} + +extern void ppc64_runlatch_on(void); +extern void ppc64_runlatch_off(void); extern unsigned long scom970_read(unsigned int address); extern void scom970_write(unsigned int address, unsigned long value); @@ -645,15 +627,6 @@ extern void scom970_write(unsigned int address, unsigned long value); #define __get_SP() ({unsigned long sp; \ asm volatile("mr %0,1": "=r" (sp)); sp;}) -#else /* __ASSEMBLY__ */ - -#define RUNLATCH_ON(REG) \ -BEGIN_FTR_SECTION \ - mfspr (REG),SPRN_CTRLF; \ - ori (REG),(REG),CTRL_RUNLATCH; \ - mtspr SPRN_CTRLT,(REG); \ -END_FTR_SECTION_IFSET(CPU_FTR_CTRL) - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_REG_H */ diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h index c044ec16a879..237fc2b72974 100644 --- a/include/asm-powerpc/thread_info.h +++ b/include/asm-powerpc/thread_info.h @@ -113,7 +113,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_POLLING_NRFLAG 4 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 5 /* 32 bit binary */ -/* #define SPARE 6 */ +#define TIF_RUNLATCH 6 /* Is the runlatch enabled? */ #define TIF_ABI_PENDING 7 /* 32/64 bit switch needed */ #define TIF_SYSCALL_AUDIT 8 /* syscall auditing active */ #define TIF_SINGLESTEP 9 /* singlestepping active */ @@ -131,7 +131,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1< Date: Fri, 24 Feb 2006 13:03:51 -0800 Subject: [PATCH] m32r: fix and update for gcc-4.0 Fix and update for gcc-4.0. - arch/m32r/kernel/signal.c: Change type of the 8th parameter of sys_rt_sigsuspend() from 'struct pt_regs' to 'struct pt_regs *'. This functions make use of the 'regs' parameter to return status value, but gcc-4.0 optimizes and removes it as a dead code. Functions, sys_sigaltstack() and sys_rt_sigreturn(), have also modified. - arch/m32r/lib/usercopy.c, include/asm-m32r/uaccess.h: Add early-clobber constraints('&') to output values of asm statements; these constraints seems to be required for gcc-4.0 register assignment. Signed-off-by: Hayato Fujiwara Signed-off-by: Hirokazu Takata Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/m32r/kernel/signal.c | 24 ++++++++++-------------- arch/m32r/lib/usercopy.c | 4 ++-- include/asm-m32r/uaccess.h | 8 ++++---- 3 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index 71763f7a1d19..cb33097fefc4 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -36,7 +36,7 @@ int do_signal(struct pt_regs *, sigset_t *); asmlinkage int sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, unsigned long r2, unsigned long r3, unsigned long r4, - unsigned long r5, unsigned long r6, struct pt_regs regs) + unsigned long r5, unsigned long r6, struct pt_regs *regs) { sigset_t saveset, newset; @@ -54,21 +54,21 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - regs.r0 = -EINTR; + regs->r0 = -EINTR; while (1) { current->state = TASK_INTERRUPTIBLE; schedule(); - if (do_signal(®s, &saveset)) - return regs.r0; + if (do_signal(regs, &saveset)) + return regs->r0; } } asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, unsigned long r2, unsigned long r3, unsigned long r4, - unsigned long r5, unsigned long r6, struct pt_regs regs) + unsigned long r5, unsigned long r6, struct pt_regs *regs) { - return do_sigaltstack(uss, uoss, regs.spu); + return do_sigaltstack(uss, uoss, regs->spu); } @@ -140,11 +140,10 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, asmlinkage int sys_rt_sigreturn(unsigned long r0, unsigned long r1, unsigned long r2, unsigned long r3, unsigned long r4, - unsigned long r5, unsigned long r6, struct pt_regs regs) + unsigned long r5, unsigned long r6, struct pt_regs *regs) { - struct rt_sigframe __user *frame = (struct rt_sigframe __user *)regs.spu; + struct rt_sigframe __user *frame = (struct rt_sigframe __user *)regs->spu; sigset_t set; - stack_t st; int result; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) @@ -158,14 +157,11 @@ sys_rt_sigreturn(unsigned long r0, unsigned long r1, recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - if (restore_sigcontext(®s, &frame->uc.uc_mcontext, &result)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &result)) goto badframe; - if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st))) + if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->spu) == -EFAULT) goto badframe; - /* It is more difficult to avoid calling this function than to - call it and ignore errors. */ - do_sigaltstack(&st, NULL, regs.spu); return result; diff --git a/arch/m32r/lib/usercopy.c b/arch/m32r/lib/usercopy.c index ce16bbe26a52..2d1dd2106c4d 100644 --- a/arch/m32r/lib/usercopy.c +++ b/arch/m32r/lib/usercopy.c @@ -64,7 +64,7 @@ do { \ " .balign 4\n" \ " .long 0b,3b\n" \ ".previous" \ - : "=r"(res), "=r"(count), "=&r" (__d0), "=&r" (__d1), \ + : "=&r"(res), "=&r"(count), "=&r" (__d0), "=&r" (__d1), \ "=&r" (__d2) \ : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), \ "4"(dst) \ @@ -101,7 +101,7 @@ do { \ " .balign 4\n" \ " .long 0b,3b\n" \ ".previous" \ - : "=r"(res), "=r"(count), "=&r" (__d0), "=&r" (__d1), \ + : "=&r"(res), "=&r"(count), "=&r" (__d0), "=&r" (__d1), \ "=&r" (__d2) \ : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), \ "4"(dst) \ diff --git a/include/asm-m32r/uaccess.h b/include/asm-m32r/uaccess.h index 0da7c47d2f01..e8ae61956a51 100644 --- a/include/asm-m32r/uaccess.h +++ b/include/asm-m32r/uaccess.h @@ -328,7 +328,7 @@ extern void __put_user_bad(void); " .long 1b,4b\n" \ " .long 2b,4b\n" \ ".previous" \ - : "=r"(err) \ + : "=&r"(err) \ : "r"(x), "r"(addr), "i"(-EFAULT), "0"(err) \ : "r14", "memory") @@ -353,7 +353,7 @@ extern void __put_user_bad(void); " .long 1b,4b\n" \ " .long 2b,4b\n" \ ".previous" \ - : "=r"(err) \ + : "=&r"(err) \ : "r"(x), "r"(addr), "i"(-EFAULT), "0"(err) \ : "r14", "memory") #else @@ -398,7 +398,7 @@ struct __large_struct { unsigned long buf[100]; }; " .balign 4\n" \ " .long 1b,3b\n" \ ".previous" \ - : "=r"(err) \ + : "=&r"(err) \ : "r"(x), "r"(addr), "i"(-EFAULT), "0"(err) \ : "r14", "memory") @@ -442,7 +442,7 @@ do { \ " .balign 4\n" \ " .long 1b,3b\n" \ ".previous" \ - : "=r"(err), "=&r"(x) \ + : "=&r"(err), "=&r"(x) \ : "r"(addr), "i"(-EFAULT), "0"(err) \ : "r14", "memory") -- cgit v1.2.3 From 124d90be62343f71bbb7a6b4a907b5584181e6d5 Mon Sep 17 00:00:00 2001 From: Prasanna S Panchamukhi Date: Fri, 24 Feb 2006 13:04:08 -0800 Subject: [PATCH] Kprobes causes NX protection fault on i686 SMP Fix a problem seen on i686 machine with NX support where the instruction could not be single stepped because of NX bit set on the memory pages allocated by kprobes module. This patch provides allocation of instruction solt so that the processor can execute the instruction from that location similar to x86_64 architecture. Thanks to Bibo and Masami for testing this patch. Signed-off-by: Prasanna S Panchamukhi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 16 ++++++++++++++-- include/asm-i386/kprobes.h | 7 +++++-- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 6483eeb1a4e8..694a13997637 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -58,6 +58,11 @@ static inline int is_IF_modifier(kprobe_opcode_t opcode) int __kprobes arch_prepare_kprobe(struct kprobe *p) { + /* insn: must be on special executable page on i386. */ + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); p->opcode = *p->addr; return 0; @@ -77,6 +82,13 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + down(&kprobe_mutex); + free_insn_slot(p->ainsn.insn); + up(&kprobe_mutex); +} + static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) { kcb->prev_kprobe.kp = kprobe_running(); @@ -111,7 +123,7 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) if (p->opcode == BREAKPOINT_INSTRUCTION) regs->eip = (unsigned long)p->addr; else - regs->eip = (unsigned long)&p->ainsn.insn; + regs->eip = (unsigned long)p->ainsn.insn; } /* Called with kretprobe_lock held */ @@ -351,7 +363,7 @@ static void __kprobes resume_execution(struct kprobe *p, { unsigned long *tos = (unsigned long *)®s->esp; unsigned long next_eip = 0; - unsigned long copy_eip = (unsigned long)&p->ainsn.insn; + unsigned long copy_eip = (unsigned long)p->ainsn.insn; unsigned long orig_eip = (unsigned long)p->addr; switch (p->ainsn.insn[0]) { diff --git a/include/asm-i386/kprobes.h b/include/asm-i386/kprobes.h index 27cac050a60e..a0d2d74a7dda 100644 --- a/include/asm-i386/kprobes.h +++ b/include/asm-i386/kprobes.h @@ -27,6 +27,9 @@ #include #include +#define __ARCH_WANT_KPROBES_INSN_SLOT + +struct kprobe; struct pt_regs; typedef u8 kprobe_opcode_t; @@ -40,14 +43,14 @@ typedef u8 kprobe_opcode_t; #define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)pentry #define ARCH_SUPPORTS_KRETPROBES -#define arch_remove_kprobe(p) do {} while (0) +void arch_remove_kprobe(struct kprobe *p); void kretprobe_trampoline(void); /* Architecture specific copy of original instruction*/ struct arch_specific_insn { /* copy of the original instruction */ - kprobe_opcode_t insn[MAX_INSN_SIZE]; + kprobe_opcode_t *insn; }; struct prev_kprobe { -- cgit v1.2.3 From 2b932f6cf052920fb3a6281499e08209b08f5086 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 24 Feb 2006 13:04:14 -0800 Subject: [PATCH] x86: fix broken SMP boot sequence Recent GDT changes broke the SMP boot sequence if the booting CPU is numbered anything other than zero. There's also a subtle source of error in that the boot time CPU now uses cpu_gdt_table (which is actually the GDT for booting CPUs in head.S). This patch fixes both problems by making GDT descriptors themselves allocated from a per_cpu area and switching to them in cpu_init(), which now means that cpu_gdt_table is exclusively used for booting CPUs again. Signed-off-by: James Bottomley Cc: Zachary Amsden Cc: Matt Tolentino Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 32 ++++++++++++++++++++++++++++---- arch/i386/kernel/efi.c | 12 +++++++----- arch/i386/kernel/head.S | 2 -- arch/i386/kernel/i386_ksyms.c | 2 -- arch/i386/kernel/smpboot.c | 6 ------ include/asm-i386/desc.h | 6 ++++-- 6 files changed, 39 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 7eb9213734a3..4ecd4b326ded 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,9 @@ #include "cpu.h" +DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); +EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); + DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); @@ -571,8 +575,9 @@ void __devinit cpu_init(void) int cpu = smp_processor_id(); struct tss_struct * t = &per_cpu(init_tss, cpu); struct thread_struct *thread = ¤t->thread; - struct desc_struct *gdt = get_cpu_gdt_table(cpu); + struct desc_struct *gdt; __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); + struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); if (cpu_test_and_set(cpu, cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); @@ -589,6 +594,25 @@ void __devinit cpu_init(void) set_in_cr4(X86_CR4_TSD); } + /* + * This is a horrible hack to allocate the GDT. The problem + * is that cpu_init() is called really early for the boot CPU + * (and hence needs bootmem) but much later for the secondary + * CPUs, when bootmem will have gone away + */ + if (NODE_DATA(0)->bdata->node_bootmem_map) { + gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE); + /* alloc_bootmem_pages panics on failure, so no check */ + memset(gdt, 0, PAGE_SIZE); + } else { + gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); + if (unlikely(!gdt)) { + printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); + for (;;) + local_irq_enable(); + } + } + /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: @@ -601,10 +625,10 @@ void __devinit cpu_init(void) ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | (CPU_16BIT_STACK_SIZE - 1); - cpu_gdt_descr[cpu].size = GDT_SIZE - 1; - cpu_gdt_descr[cpu].address = (unsigned long)gdt; + cpu_gdt_descr->size = GDT_SIZE - 1; + cpu_gdt_descr->address = (unsigned long)gdt; - load_gdt(&cpu_gdt_descr[cpu]); + load_gdt(cpu_gdt_descr); load_idt(&idt_descr); /* diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index ecad519fd395..e3e42fd62401 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -103,17 +103,19 @@ static void efi_call_phys_prelog(void) */ local_flush_tlb(); - cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); - load_gdt((struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])); + per_cpu(cpu_gdt_descr, 0).address = + __pa(per_cpu(cpu_gdt_descr, 0).address); + load_gdt((struct Xgt_desc_struct *)__pa(&per_cpu(cpu_gdt_descr, 0))); } static void efi_call_phys_epilog(void) { unsigned long cr4; - cpu_gdt_descr[0].address = - (unsigned long) __va(cpu_gdt_descr[0].address); - load_gdt(&cpu_gdt_descr[0]); + per_cpu(cpu_gdt_descr, 0).address = + (unsigned long)__va(per_cpu(cpu_gdt_descr, 0).address); + load_gdt((struct Xgt_desc_struct *)__va(&per_cpu(cpu_gdt_descr, 0))); + cr4 = read_cr4(); if (cr4 & X86_CR4_PSE) { diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 2bee6499edd9..e0b7c632efbc 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -534,5 +534,3 @@ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* 0xf0 - unused */ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ - /* Be sure this is zeroed to avoid false validations in Xen */ - .fill PAGE_SIZE_asm / 8 - GDT_ENTRIES,8,0 diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 3999bec50c33..055325056a74 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -3,8 +3,6 @@ #include #include -EXPORT_SYMBOL_GPL(cpu_gdt_descr); - EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); EXPORT_SYMBOL(__down_failed_trylock); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index fb00ab7b7612..eba7f53f8b4a 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -898,12 +898,6 @@ static int __devinit do_boot_cpu(int apicid, int cpu) unsigned long start_eip; unsigned short nmi_high = 0, nmi_low = 0; - if (!cpu_gdt_descr[cpu].address && - !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { - printk("Failed to allocate GDT for CPU %d\n", cpu); - return 1; - } - ++cpucount; /* diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index 494e73bca095..89b8b82c82b3 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h @@ -24,11 +24,13 @@ struct Xgt_desc_struct { unsigned short pad; } __attribute__ ((packed)); -extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS]; +extern struct Xgt_desc_struct idt_descr; +DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); + static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) { - return ((struct desc_struct *)cpu_gdt_descr[cpu].address); + return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; } #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) -- cgit v1.2.3 From c04030e16dbea2f7581f82cc6688695927f6ac5b Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 24 Feb 2006 13:04:21 -0800 Subject: [PATCH] flags parameter for linkat I'm currently at the POSIX meeting and one thing covered was the incompatibility of Linux's link() with the POSIX definition. The name. Linux does not follow symlinks, POSIX requires it does. Even if somebody thinks this is a good default behavior we cannot change this because it would break the ABI. But the fact remains that some application might want this behavior. We have one chance to help implementing this without breaking the behavior. For this we could use the new linkat interface which would need a new flags parameter. If the new parameter is AT_SYMLINK_FOLLOW the new behavior could be invoked. I do not want to introduce such a patch now. But we could add the parameter now, just don't use it. The patch below would do this. Can we get this late patch applied before the release more or less fixes the syscall API? Signed-off-by: Ulrich Drepper Signed-off-by: Ralf Baechle Cc: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/kernel/scall32-o32.S | 2 +- arch/s390/kernel/compat_wrapper.S | 1 + fs/namei.c | 8 ++++++-- include/linux/syscalls.h | 2 +- 4 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index d83e033dbc87..2f2dc54b2e26 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -626,7 +626,7 @@ einval: li v0, -EINVAL sys sys_fstatat64 4 sys sys_unlinkat 3 sys sys_renameat 4 /* 4295 */ - sys sys_linkat 4 + sys sys_linkat 5 sys sys_symlinkat 3 sys sys_readlinkat 4 sys sys_fchmodat 3 diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 615964cca15f..50e80138e7ad 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1552,6 +1552,7 @@ sys_linkat_wrapper: llgtr %r3,%r3 # const char * lgfr %r4,%r4 # int llgtr %r5,%r5 # const char * + lgfr %r6,%r6 # int jg sys_linkat .globl sys_symlinkat_wrapper diff --git a/fs/namei.c b/fs/namei.c index e28de846c591..557dcf395ca1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2224,13 +2224,17 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de * and other special files. --ADM */ asmlinkage long sys_linkat(int olddfd, const char __user *oldname, - int newdfd, const char __user *newname) + int newdfd, const char __user *newname, + int flags) { struct dentry *new_dentry; struct nameidata nd, old_nd; int error; char * to; + if (flags != 0) + return -EINVAL; + to = getname(newname); if (IS_ERR(to)) return PTR_ERR(to); @@ -2263,7 +2267,7 @@ exit: asmlinkage long sys_link(const char __user *oldname, const char __user *newname) { - return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname); + return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); } /* diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d73501ba7e44..b9ea44ac0ddb 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -543,7 +543,7 @@ asmlinkage long sys_unlinkat(int dfd, const char __user * pathname, int flag); asmlinkage long sys_symlinkat(const char __user * oldname, int newdfd, const char __user * newname); asmlinkage long sys_linkat(int olddfd, const char __user *oldname, - int newdfd, const char __user *newname); + int newdfd, const char __user *newname, int flags); asmlinkage long sys_renameat(int olddfd, const char __user * oldname, int newdfd, const char __user * newname); asmlinkage long sys_futimesat(int dfd, char __user *filename, -- cgit v1.2.3 From 60b08c67220cf6faef7410ac6adba23a8a743bf7 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Sun, 26 Feb 2006 04:18:22 +0100 Subject: [PATCH] x86_64: no_iommu removal in pci-gart.c In previous versions of pci-gart.c, no_iommu was used to determine if IOMMU was disabled in the GART DMA mapping functions. This changed in 2.6.16 and now gart_xxx() functions are only called if gart is enabled. Therefore, uses of no_iommu in the GART code are no longer necessary and can be removed. Also, it removes double deceleration of no_iommu and force_iommu in pci.h and proto.h, by removing the deceleration in pci.h. Lastly, end_pfn off by one error. Tested (along with patch 1/2) on dual opteron with gart enabled, iommu=soft, and iommu=off. Signed-off-by: Jon Mason Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/aperture.c | 2 +- arch/x86_64/kernel/pci-gart.c | 22 +++++----------------- include/asm-x86_64/pci.h | 2 -- 3 files changed, 6 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index e4e2b7d01f89..a0f955b9995f 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -248,7 +248,7 @@ void __init iommu_hole_init(void) /* Got the aperture from the AGP bridge */ } else if (swiotlb && !valid_agp) { /* Do nothing */ - } else if ((!no_iommu && end_pfn >= MAX_DMA32_PFN) || + } else if ((!no_iommu && end_pfn > MAX_DMA32_PFN) || force_iommu || valid_agp || fallback_aper_force) { diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index dd0718dc178b..0c3f052ba6ce 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -228,11 +228,6 @@ static inline int need_iommu(struct device *dev, unsigned long addr, size_t size int mmu = high; if (force_iommu) mmu = 1; - if (no_iommu) { - if (high) - panic("PCI-DMA: high address but no IOMMU.\n"); - mmu = 0; - } return mmu; } @@ -241,11 +236,6 @@ static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t u64 mask = *dev->dma_mask; int high = addr + size >= mask; int mmu = high; - if (no_iommu) { - if (high) - panic("PCI-DMA: high address but no IOMMU.\n"); - mmu = 0; - } return mmu; } @@ -310,7 +300,7 @@ void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int di for (i = 0; i < nents; i++) { struct scatterlist *s = &sg[i]; - if (!s->dma_length) + if (!s->dma_length || !s->length) break; dma_unmap_single(dev, s->dma_address, s->dma_length, dir); } @@ -364,6 +354,7 @@ static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, BUG_ON(i > start && s->offset); if (i == start) { + *sout = *s; sout->dma_address = iommu_bus_base; sout->dma_address += iommu_page*PAGE_SIZE + s->offset; sout->dma_length = s->length; @@ -390,6 +381,7 @@ static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat, { if (!need) { BUG_ON(stopat - start != 1); + *sout = sg[start]; sout->dma_length = sg[start].length; return 0; } @@ -632,17 +624,13 @@ static int __init pci_iommu_init(void) (agp_copy_info(agp_bridge, &info) < 0); #endif - if (swiotlb) { - no_iommu = 1; + if (swiotlb) return -1; - } - + if (no_iommu || (!force_iommu && end_pfn <= MAX_DMA32_PFN) || !iommu_aperture || (no_agp && init_k8_gatt(&info) < 0)) { - no_iommu = 1; - no_iommu_init(); printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); if (end_pfn > MAX_DMA32_PFN) { printk(KERN_ERR "WARNING more than 4GB of memory " diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h index fd03e15d7ea6..8a05af264d18 100644 --- a/include/asm-x86_64/pci.h +++ b/include/asm-x86_64/pci.h @@ -19,8 +19,6 @@ extern unsigned int pcibios_assign_all_busses(void); #endif #define pcibios_scan_all_fns(a, b) 0 -extern int no_iommu, force_iommu; - extern unsigned long pci_mem_start; #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM (pci_mem_start) -- cgit v1.2.3 From f83f2b5fbab4585f4de4523c7879d60e3f85a248 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sun, 26 Feb 2006 04:18:25 +0100 Subject: [PATCH] x86_64: fix USER_PTRS_PER_PGD The value, while currently unused in the native kernel, was off by one. Signed-Off-By: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 8fbf4dd72115..715fd94cf577 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -131,7 +131,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) +#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1) #define FIRST_USER_ADDRESS 0 #ifndef __ASSEMBLY__ -- cgit v1.2.3 From 13a229abc25640813f1480c0478dfc6bdbc1c19e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 26 Feb 2006 04:18:31 +0100 Subject: [PATCH] x86_64: Only do the clustered systems have unsynchronized TSC assumption on IBM systems Big Unisys systems have multiple clusters too, but they have an synchronized TSC. I'm using the SMBIOS to check for vendor == IBM. Cc: Chris McDermott Cc: "Protasevich, Natalie" Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/acpi/boot.c | 3 --- arch/x86_64/kernel/apic.c | 9 ++++++++- include/asm-x86_64/acpi.h | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 79577f0ace98..8309a7b2cd63 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -44,9 +44,6 @@ extern void __init clustered_apic_check(void); extern int gsi_irq_sharing(int gsi); #include -static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } - - #else /* X86 */ #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index e5b14c57eaa0..d70605eda333 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -962,12 +962,14 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) irq_exit(); } +int __initdata unsync_tsc_on_multicluster; + /* * oem_force_hpet_timer -- force HPET mode for some boxes. * * Thus far, the major user of this is IBM's Summit2 series: * - * Clustered boxes may have unsynced TSC problems if they are + * Some clustered boxes may have unsynced TSC problems if they are * multi-chassis. Use available data to take a good guess. * If in doubt, go HPET. */ @@ -977,6 +979,11 @@ __cpuinit int oem_force_hpet_timer(void) unsigned id; DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); + /* Only do this check on IBM machines - big Unisys systems + use multiple clusters too, but have synchronized TSC */ + if (!unsync_tsc_on_multicluster) + return 0; + bitmap_zero(clustermap, NUM_APIC_CLUSTERS); for (i = 0; i < NR_CPUS; i++) { diff --git a/include/asm-x86_64/acpi.h b/include/asm-x86_64/acpi.h index aa1c7b2e438c..e2b9923189a0 100644 --- a/include/asm-x86_64/acpi.h +++ b/include/asm-x86_64/acpi.h @@ -164,6 +164,20 @@ extern u8 x86_acpiid_to_apicid[]; extern int acpi_skip_timer_override; +extern int unsync_tsc_on_multicluster; + +static inline int acpi_madt_oem_check(char *oem, char *productid) +{ + /* Copied from i386. Probably has too many entries. */ + if (!strncmp(oem, "IBM ENSW", 8) && + (!strncmp(productid, "VIGIL SMP", 9) + || !strncmp(productid, "EXA", 3) + || !strncmp(productid, "RUTHLESS SMP", 12))) { + unsync_tsc_on_multicluster = 1; + } + return 0; +} + #endif /*__KERNEL__*/ #endif /*_ASM_ACPI_H*/ -- cgit v1.2.3 From e2c0388866dc12bef56b178b958f9b778fe6c687 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 26 Feb 2006 04:18:46 +0100 Subject: [PATCH] x86_64: Fix the additional_cpus=.. option It didn't set up the CPU possible map early enough, so the option didn't actually work. Noticed by Heiko Carstens Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 6 ++++++ arch/x86_64/kernel/smpboot.c | 2 +- include/asm-x86_64/proto.h | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 9435ab7d6fb8..5de7eaf5d97c 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -423,6 +423,12 @@ static __init void parse_cmdline_early (char ** cmdline_p) else if(!memcmp(from, "elfcorehdr=", 11)) elfcorehdr_addr = memparse(from+11, &from); #endif + +#ifdef CONFIG_SMP + else if (!memcmp(from, "additional_cpus=", 16)) + setup_additional_cpus(from+16); +#endif + next_char: c = *(from++); if (!c) diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 67e4e28f4df8..b82eb86e4f5d 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -1244,7 +1244,7 @@ void __cpu_die(unsigned int cpu) printk(KERN_ERR "CPU %u didn't die...\n", cpu); } -static __init int setup_additional_cpus(char *s) +__init int setup_additional_cpus(char *s) { return get_option(&s, &additional_cpus); } diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index eca3f2d633db..8bdcbd0aa03f 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -134,6 +134,7 @@ extern int force_iommu; extern int reboot_force; extern int notsc_setup(char *); +extern int setup_additional_cpus(char *); extern void smp_local_timer_interrupt(struct pt_regs * regs); -- cgit v1.2.3 From e8b917775b572bc27de105f1317c2de4335db5b3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 26 Feb 2006 04:18:49 +0100 Subject: [PATCH] x86_64: Move the SMP time selection earlier SMP time selection originally ran after all CPUs were brought up because it needed to know the number of CPUs to decide if it needs an MP safe timer or not. This is not needed anymore because we know present CPUs early. This fixes a couple of problems: - apicmaintimer didn't always work because it relied on state that was set up time_init_gtod too late. - The output for the used timer in early kernel log was misleading because time_init_gtod could actually change it later. Now always print the final timer choice Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/smpboot.c | 2 -- arch/x86_64/kernel/time.c | 22 +++++++++++----------- include/asm-x86_64/proto.h | 1 - 3 files changed, 11 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index b82eb86e4f5d..66e98659d077 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -1152,8 +1152,6 @@ void __init smp_cpus_done(unsigned int max_cpus) setup_ioapic_dest(); #endif - time_init_gtod(); - check_nmi_watchdog(); } diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 67841d11ed1f..3080f84bf7b7 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -48,6 +48,8 @@ static void cpufreq_delayed_get(void); extern void i8254_timer_resume(void); extern int using_apic_timer; +static char *time_init_gtod(void); + DEFINE_SPINLOCK(rtc_lock); DEFINE_SPINLOCK(i8253_lock); @@ -901,6 +903,7 @@ static struct irqaction irq0 = { void __init time_init(void) { char *timename; + char *gtod; #ifdef HPET_HACK_ENABLE_DANGEROUS if (!vxtime.hpet_address) { @@ -945,21 +948,19 @@ void __init time_init(void) timename = "PIT"; } - printk(KERN_INFO "time.c: Using %ld.%06ld MHz %s timer.\n", - vxtime_hz / 1000000, vxtime_hz % 1000000, timename); + vxtime.mode = VXTIME_TSC; + gtod = time_init_gtod(); + + printk(KERN_INFO "time.c: Using %ld.%06ld MHz WALL %s GTOD %s timer.\n", + vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod); printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); - vxtime.mode = VXTIME_TSC; vxtime.quot = (1000000L << 32) / vxtime_hz; vxtime.tsc_quot = (1000L << 32) / cpu_khz; vxtime.last_tsc = get_cycles_sync(); setup_irq(0, &irq0); set_cyc2ns_scale(cpu_khz); - -#ifndef CONFIG_SMP - time_init_gtod(); -#endif } /* @@ -981,9 +982,9 @@ __cpuinit int unsynchronized_tsc(void) } /* - * Decide after all CPUs are booted what mode gettimeofday should use. + * Decide what mode gettimeofday should use. */ -void __init time_init_gtod(void) +__init static char *time_init_gtod(void) { char *timetype; @@ -1011,8 +1012,7 @@ void __init time_init_gtod(void) timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; vxtime.mode = VXTIME_TSC; } - - printk(KERN_INFO "time.c: Using %s based timekeeping.\n", timetype); + return timetype; } __setup("report_lost_ticks", time_setup); diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index 8bdcbd0aa03f..3ba8fd45fcb3 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -39,7 +39,6 @@ extern void config_acpi_tables(void); extern void ia32_syscall(void); extern void iommu_hole_init(void); -extern void time_init_gtod(void); extern int pmtimer_mark_offset(void); extern void pmtimer_resume(void); extern void pmtimer_wait(unsigned); -- cgit v1.2.3 From 043df59eb3798c094e6ba47136f3d3b34a6791a7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 25 Feb 2006 12:15:31 -0800 Subject: [SPARC64]: Implement futex_atomic_op_inuser(). Signed-off-by: David S. Miller --- include/asm-sparc64/futex.h | 88 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 84 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h index 6a332a9f099c..0caf60147e97 100644 --- a/include/asm-sparc64/futex.h +++ b/include/asm-sparc64/futex.h @@ -1,6 +1,86 @@ -#ifndef _ASM_FUTEX_H -#define _ASM_FUTEX_H +#ifndef _SPARC64_FUTEX_H +#define _SPARC64_FUTEX_H -#include +#include +#include +#include +#include -#endif +#define __futex_cas_op(insn, ret, oldval, uaddr, oparg) \ + __asm__ __volatile__( \ + "\n1: lduwa [%3] %%asi, %2\n" \ + " " insn "\n" \ + "2: casa [%3] %%asi, %2, %1\n" \ + " cmp %2, %1\n" \ + " bne,pn %%icc, 1b\n" \ + " mov 0, %0\n" \ + "3:\n" \ + " .section .fixup,#alloc,#execinstr\n" \ + " .align 4\n" \ + "4: ba 3b\n" \ + " mov %5, %0\n" \ + " .previous\n" \ + " .section __ex_table,#alloc\n" \ + " .align 4\n" \ + " .word 1b, 4b\n" \ + " .word 2b, 4b\n" \ + " .previous\n" \ + : "=&r" (ret), "=&r" (oldval), "=&r" (tem) \ + : "r" (uaddr), "r" (oparg), "i" (-EFAULT) \ + : "memory") + +static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + + if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))) + return -EFAULT; + if (unlikely((((unsigned long) uaddr) & 0x3UL))) + return -EINVAL; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + __futex_cas_op("mov\t%4, %1", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_cas_op("add\t%2, %4, %1", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_cas_op("or\t%2, %4, %1", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_cas_op("and\t%2, %4, %1", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_XOR: + __futex_cas_op("xor\t%2, %4, %1", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif /* !(_SPARC64_FUTEX_H) */ -- cgit v1.2.3 From 7abea9214585823f7f19d91872d7c6f8874bef9a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 25 Feb 2006 13:39:56 -0800 Subject: [SPARC64]: Make cpu_present_map available earlier. The change to kernel/sched.c's init code to use for_each_cpu() requires that the cpu_possible_map be setup much earlier. Set it up via setup_arch(), constrained to NR_CPUS, and later constrain it to max_cpus in smp_prepare_cpus(). This fixes SMP booting on sparc64. Signed-off-by: David S. Miller --- arch/sparc64/kernel/setup.c | 2 ++ arch/sparc64/kernel/smp.c | 28 +++++++++++++++++++--------- include/asm-sparc64/smp.h | 6 ++++++ 3 files changed, 27 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c index 054461e6946d..158bd31e15b7 100644 --- a/arch/sparc64/kernel/setup.c +++ b/arch/sparc64/kernel/setup.c @@ -542,6 +542,8 @@ void __init setup_arch(char **cmdline_p) } #endif + smp_setup_cpu_possible_map(); + paging_init(); } diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 1fb6323e65a4..1f7ad8a69052 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c @@ -1079,18 +1079,12 @@ int setup_profiling_timer(unsigned int multiplier) return 0; } +/* Constrain the number of cpus to max_cpus. */ void __init smp_prepare_cpus(unsigned int max_cpus) { - int instance, mid; - - instance = 0; - while (!cpu_find_by_instance(instance, NULL, &mid)) { - if (mid < max_cpus) - cpu_set(mid, phys_cpu_present_map); - instance++; - } - if (num_possible_cpus() > max_cpus) { + int instance, mid; + instance = 0; while (!cpu_find_by_instance(instance, NULL, &mid)) { if (mid != boot_cpu_id) { @@ -1105,6 +1099,22 @@ void __init smp_prepare_cpus(unsigned int max_cpus) smp_store_cpu_info(boot_cpu_id); } +/* Set this up early so that things like the scheduler can init + * properly. We use the same cpu mask for both the present and + * possible cpu map. + */ +void __init smp_setup_cpu_possible_map(void) +{ + int instance, mid; + + instance = 0; + while (!cpu_find_by_instance(instance, NULL, &mid)) { + if (mid < NR_CPUS) + cpu_set(mid, phys_cpu_present_map); + instance++; + } +} + void __devinit smp_prepare_boot_cpu(void) { if (hard_smp_processor_id() >= NR_CPUS) { diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h index 110a2de89123..473edb2603ec 100644 --- a/include/asm-sparc64/smp.h +++ b/include/asm-sparc64/smp.h @@ -66,8 +66,14 @@ static __inline__ int hard_smp_processor_id(void) #define raw_smp_processor_id() (current_thread_info()->cpu) +extern void smp_setup_cpu_possible_map(void); + #endif /* !(__ASSEMBLY__) */ +#else + +#define smp_setup_cpu_possible_map() do { } while (0) + #endif /* !(CONFIG_SMP) */ #define NO_PROC_ID 0xFF -- cgit v1.2.3 From 3e6cb2d38a9c9758170813497a860c64543643d5 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 21 Feb 2006 18:32:14 +0000 Subject: [MIPS] Use "=R" constraint to avoid compiler errors in cmpxchg(). Signed-off-by: Ralf Baechle --- include/asm-mips/system.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h index e8e5d4143377..ddae9bae31af 100644 --- a/include/asm-mips/system.h +++ b/include/asm-mips/system.h @@ -322,7 +322,7 @@ static inline unsigned long __cmpxchg_u32(volatile int * m, unsigned long old, #endif "2: \n" " .set pop \n" - : "=&r" (retval), "=m" (*m) + : "=&r" (retval), "=R" (*m) : "R" (*m), "Jr" (old), "Jr" (new) : "memory"); } else if (cpu_has_llsc) { @@ -342,7 +342,7 @@ static inline unsigned long __cmpxchg_u32(volatile int * m, unsigned long old, #endif "2: \n" " .set pop \n" - : "=&r" (retval), "=m" (*m) + : "=&r" (retval), "=R" (*m) : "R" (*m), "Jr" (old), "Jr" (new) : "memory"); } else { @@ -379,7 +379,7 @@ static inline unsigned long __cmpxchg_u64(volatile int * m, unsigned long old, #endif "2: \n" " .set pop \n" - : "=&r" (retval), "=m" (*m) + : "=&r" (retval), "=R" (*m) : "R" (*m), "Jr" (old), "Jr" (new) : "memory"); } else if (cpu_has_llsc) { @@ -397,7 +397,7 @@ static inline unsigned long __cmpxchg_u64(volatile int * m, unsigned long old, #endif "2: \n" " .set pop \n" - : "=&r" (retval), "=m" (*m) + : "=&r" (retval), "=R" (*m) : "R" (*m), "Jr" (old), "Jr" (new) : "memory"); } else { -- cgit v1.2.3 From 9b6695a8adfe0916e81ddd810a5b9db3eb8b0e46 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 23 Feb 2006 12:23:27 +0000 Subject: [MIPS] SMP: Fix initialization order bug. A recent change requires cpu_possible_map to be initialized before smp_sched_init() but most MIPS platforms were initializing their processors in the prom_prepare_cpus callback of smp_prepare_cpus. The simple fix of calling prom_prepare_cpus from one of the earlier SMP initialization hooks doesn't work well either since IPIs may require init_IRQ() to have completed, so bit the bullet and split prom_prepare_cpus into two initialization functions, plat_smp_setup which is called early from setup_arch and plat_prepare_cpus called where prom_prepare_cpus used to be called. Signed-off-by: Ralf Baechle --- arch/mips/kernel/setup.c | 3 +++ arch/mips/kernel/smp.c | 2 +- arch/mips/kernel/smp_mt.c | 13 +++++++------ arch/mips/pmc-sierra/yosemite/smp.c | 24 ++++++------------------ arch/mips/sgi-ip27/ip27-smp.c | 7 ++++++- arch/mips/sibyte/cfe/smp.c | 10 +++++++--- include/asm-mips/smp.h | 11 +++++++++-- 7 files changed, 39 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index d86affa21278..d9293c558e41 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -540,6 +540,9 @@ void __init setup_arch(char **cmdline_p) sparse_init(); paging_init(); resource_init(); +#ifdef CONFIG_SMP + plat_smp_setup(); +#endif } int __init fpu_disable(char *s) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 5e189862e523..06ed90752424 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -236,7 +236,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) init_new_context(current, &init_mm); current_thread_info()->cpu = 0; smp_tune_scheduling(); - prom_prepare_cpus(max_cpus); + plat_prepare_cpus(max_cpus); } /* preload SMP state for boot cpu */ diff --git a/arch/mips/kernel/smp_mt.c b/arch/mips/kernel/smp_mt.c index c930364830d0..993b8bf56aaf 100644 --- a/arch/mips/kernel/smp_mt.c +++ b/arch/mips/kernel/smp_mt.c @@ -143,7 +143,7 @@ static struct irqaction irq_call = { * Make sure all CPU's are in a sensible state before we boot any of the * secondarys */ -void prom_prepare_cpus(unsigned int max_cpus) +void plat_smp_setup(void) { unsigned long val; int i, num; @@ -179,11 +179,9 @@ void prom_prepare_cpus(unsigned int max_cpus) write_vpe_c0_vpeconf0(tmp); /* Record this as available CPU */ - if (i < max_cpus) { - cpu_set(i, phys_cpu_present_map); - __cpu_number_map[i] = ++num; - __cpu_logical_map[num] = i; - } + cpu_set(i, phys_cpu_present_map); + __cpu_number_map[i] = ++num; + __cpu_logical_map[num] = i; } /* disable multi-threading with TC's */ @@ -241,7 +239,10 @@ void prom_prepare_cpus(unsigned int max_cpus) set_vi_handler (MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch); set_vi_handler (MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch); } +} +void __init plat_prepare_cpus(unsigned int max_cpus) +{ cpu_ipi_resched_irq = MIPSCPU_INT_BASE + MIPS_CPU_IPI_RESCHED_IRQ; cpu_ipi_call_irq = MIPSCPU_INT_BASE + MIPS_CPU_IPI_CALL_IRQ; diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c index 7f8fda962190..c197311e15d3 100644 --- a/arch/mips/pmc-sierra/yosemite/smp.c +++ b/arch/mips/pmc-sierra/yosemite/smp.c @@ -50,37 +50,25 @@ void __init prom_grab_secondary(void) * We don't want to start the secondary CPU yet nor do we have a nice probing * feature in PMON so we just assume presence of the secondary core. */ -static char maxcpus_string[] __initdata = - KERN_WARNING "max_cpus set to 0; using 1 instead\n"; - -void __init prom_prepare_cpus(unsigned int max_cpus) +void __init plat_smp_setup(void) { - int enabled = 0, i; - - if (max_cpus == 0) { - printk(maxcpus_string); - max_cpus = 1; - } + int i; cpus_clear(phys_cpu_present_map); for (i = 0; i < 2; i++) { - if (i == max_cpus) - break; - - /* - * The boot CPU - */ cpu_set(i, phys_cpu_present_map); __cpu_number_map[i] = i; __cpu_logical_map[i] = i; - enabled++; } +} +void __init plat_prepare_cpus(unsigned int max_cpus) +{ /* * Be paranoid. Enable the IPI only if we're really about to go SMP. */ - if (enabled > 1) + if (cpus_weight(cpu_possible_map)) set_c0_status(STATUSF_IP5); } diff --git a/arch/mips/sgi-ip27/ip27-smp.c b/arch/mips/sgi-ip27/ip27-smp.c index dbef3f6b5650..09fa7f5216f0 100644 --- a/arch/mips/sgi-ip27/ip27-smp.c +++ b/arch/mips/sgi-ip27/ip27-smp.c @@ -140,7 +140,7 @@ static __init void intr_clear_all(nasid_t nasid) REMOTE_HUB_CLR_INTR(nasid, i); } -void __init prom_prepare_cpus(unsigned int max_cpus) +void __init plat_smp_setup(void) { cnodeid_t cnode; @@ -161,6 +161,11 @@ void __init prom_prepare_cpus(unsigned int max_cpus) alloc_cpupda(0, 0); } +void __init plat_prepare_cpus(unsigned int max_cpus) +{ + /* We already did everything necessary earlier */ +} + /* * Launch a slave into smp_bootstrap(). It doesn't take an argument, and we * set sp to the kernel stack of the newly created idle process, gp to the proc diff --git a/arch/mips/sibyte/cfe/smp.c b/arch/mips/sibyte/cfe/smp.c index 4477af3d8074..eab20e2db323 100644 --- a/arch/mips/sibyte/cfe/smp.c +++ b/arch/mips/sibyte/cfe/smp.c @@ -31,7 +31,7 @@ * * Common setup before any secondaries are started */ -void __init prom_prepare_cpus(unsigned int max_cpus) +void __init plat_smp_setup(void) { int i, num; @@ -40,14 +40,18 @@ void __init prom_prepare_cpus(unsigned int max_cpus) __cpu_number_map[0] = 0; __cpu_logical_map[0] = 0; - for (i=1, num=0; i Date: Thu, 23 Feb 2006 14:10:53 +0000 Subject: [MIPS] Fix atomic*_sub_if_positive return value. Reported and initial fix by Thomas Koeller , rewritten by me. Signed-off-by: Ralf Baechle --- include/asm-mips/atomic.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/asm-mips/atomic.h b/include/asm-mips/atomic.h index 654b97d3e13a..2c8b853376c9 100644 --- a/include/asm-mips/atomic.h +++ b/include/asm-mips/atomic.h @@ -250,7 +250,10 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) " subu %0, %1, %3 \n" " bltz %0, 1f \n" " sc %0, %2 \n" + " .set noreorder \n" " beqzl %0, 1b \n" + " subu %0, %1, %3 \n" + " .set reorder \n" " sync \n" "1: \n" " .set mips0 \n" @@ -266,7 +269,10 @@ static __inline__ int atomic_sub_if_positive(int i, atomic_t * v) " subu %0, %1, %3 \n" " bltz %0, 1f \n" " sc %0, %2 \n" + " .set noreorder \n" " beqz %0, 1b \n" + " subu %0, %1, %3 \n" + " .set reorder \n" " sync \n" "1: \n" " .set mips0 \n" @@ -598,7 +604,10 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) " dsubu %0, %1, %3 \n" " bltz %0, 1f \n" " scd %0, %2 \n" + " .set noreorder \n" " beqzl %0, 1b \n" + " dsubu %0, %1, %3 \n" + " .set reorder \n" " sync \n" "1: \n" " .set mips0 \n" @@ -614,7 +623,10 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v) " dsubu %0, %1, %3 \n" " bltz %0, 1f \n" " scd %0, %2 \n" + " .set noreorder \n" " beqz %0, 1b \n" + " dsubu %0, %1, %3 \n" + " .set reorder \n" " sync \n" "1: \n" " .set mips0 \n" -- cgit v1.2.3 From 752c1f4c78fe86d0fd6497387f763306b0d8fc53 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 27 Feb 2006 13:00:40 -0800 Subject: [IPSEC]: Kill post_input hook and do NAT-T in esp_input directly The only reason post_input exists at all is that it gives us the potential to adjust the checksums incrementally in future which we ought to do. However, after thinking about it for a bit we can adjust the checksums without using this post_input stuff at all. The crucial point is that only the inner-most NAT-T SA needs to be considered when adjusting checksums. What's more, the checksum adjustment comes down to a single u32 due to the linearity of IP checksums. We just happen to have a spare u32 lying around in our skb structure :) When ip_summed is set to CHECKSUM_NONE on input, the value of skb->csum is currently unused. All we have to do is to make that the checksum adjustment and voila, there goes all the post_input and decap structures! I've left in the decap data structures for now since it's intricately woven into the sec_path stuff. We can kill them later too. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 - net/ipv4/esp4.c | 128 +++++++++++++++---------------------------------- net/xfrm/xfrm_policy.c | 7 --- 3 files changed, 38 insertions(+), 98 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 004e645f3e18..8d362c49b8a9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -233,7 +233,6 @@ struct xfrm_type int (*init_state)(struct xfrm_state *x); void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb); - int (*post_input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb); int (*output)(struct xfrm_state *, struct sk_buff *pskb); /* Estimate maximal size of result of transformation of a dgram */ u32 (*get_max_size)(struct xfrm_state *, int size); diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 3f47419cb9c5..09590f356086 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -12,13 +12,6 @@ #include #include -/* decapsulation data for use when post-processing */ -struct esp_decap_data { - xfrm_address_t saddr; - __u16 sport; - __u8 proto; -}; - static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { int err; @@ -210,25 +203,47 @@ static int esp_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struc /* ... check padding bits here. Silly. :-) */ - if (x->encap && decap && decap->decap_type) { - struct esp_decap_data *encap_data; - struct udphdr *uh = (struct udphdr *) (iph+1); - - encap_data = (struct esp_decap_data *) (decap->decap_data); - encap_data->proto = 0; - - switch (decap->decap_type) { - case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: - encap_data->proto = AF_INET; - encap_data->saddr.a4 = iph->saddr; - encap_data->sport = uh->source; - encap_len = (void*)esph - (void*)uh; - break; + if (x->encap) { + struct xfrm_encap_tmpl *encap = x->encap; + struct udphdr *uh; - default: + if (encap->encap_type != decap->decap_type) goto out; + + uh = (struct udphdr *)(iph + 1); + encap_len = (void*)esph - (void*)uh; + + /* + * 1) if the NAT-T peer's IP or port changed then + * advertize the change to the keying daemon. + * This is an inbound SA, so just compare + * SRC ports. + */ + if (iph->saddr != x->props.saddr.a4 || + uh->source != encap->encap_sport) { + xfrm_address_t ipaddr; + + ipaddr.a4 = iph->saddr; + km_new_mapping(x, &ipaddr, uh->source); + + /* XXX: perhaps add an extra + * policy check here, to see + * if we should allow or + * reject a packet from a + * different source + * address/port. + */ } + + /* + * 2) ignore UDP/TCP checksums in case + * of NAT-T in Transport Mode, or + * perform other post-processing fixes + * as per draft-ietf-ipsec-udp-encaps-06, + * section 3.1.2 + */ + if (!x->props.mode) + skb->ip_summed = CHECKSUM_UNNECESSARY; } iph->protocol = nexthdr[1]; @@ -245,63 +260,6 @@ out: return -EINVAL; } -static int esp_post_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb) -{ - - if (x->encap) { - struct xfrm_encap_tmpl *encap; - struct esp_decap_data *decap_data; - - encap = x->encap; - decap_data = (struct esp_decap_data *)(decap->decap_data); - - /* first, make sure that the decap type == the encap type */ - if (encap->encap_type != decap->decap_type) - return -EINVAL; - - switch (encap->encap_type) { - default: - case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: - /* - * 1) if the NAT-T peer's IP or port changed then - * advertize the change to the keying daemon. - * This is an inbound SA, so just compare - * SRC ports. - */ - if (decap_data->proto == AF_INET && - (decap_data->saddr.a4 != x->props.saddr.a4 || - decap_data->sport != encap->encap_sport)) { - xfrm_address_t ipaddr; - - ipaddr.a4 = decap_data->saddr.a4; - km_new_mapping(x, &ipaddr, decap_data->sport); - - /* XXX: perhaps add an extra - * policy check here, to see - * if we should allow or - * reject a packet from a - * different source - * address/port. - */ - } - - /* - * 2) ignore UDP/TCP checksums in case - * of NAT-T in Transport Mode, or - * perform other post-processing fixes - * as per * draft-ietf-ipsec-udp-encaps-06, - * section 3.1.2 - */ - if (!x->props.mode) - skb->ip_summed = CHECKSUM_UNNECESSARY; - - break; - } - } - return 0; -} - static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; @@ -457,7 +415,6 @@ static struct xfrm_type esp_type = .destructor = esp_destroy, .get_max_size = esp4_get_max_size, .input = esp_input, - .post_input = esp_post_input, .output = esp_output }; @@ -469,15 +426,6 @@ static struct net_protocol esp4_protocol = { static int __init esp4_init(void) { - struct xfrm_decap_state decap; - - if (sizeof(struct esp_decap_data) > - sizeof(decap.decap_data)) { - extern void decap_data_too_small(void); - - decap_data_too_small(); - } - if (xfrm_register_type(&esp_type, AF_INET) < 0) { printk(KERN_INFO "ip esp init: can't add xfrm type\n"); return -EAGAIN; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8206025d8e46..ae62054a9fc4 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -996,13 +996,6 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, struct sec_decap_state *xvec = &(skb->sp->x[i]); if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family)) return 0; - - /* If there is a post_input processor, try running it */ - if (xvec->xvec->type->post_input && - (xvec->xvec->type->post_input)(xvec->xvec, - &(xvec->decap), - skb) != 0) - return 0; } } -- cgit v1.2.3 From bafac2a512bf4fd2ce7520f3976ce8aab4435f74 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 27 Feb 2006 13:04:17 -0800 Subject: [NETFILTER]: Restore {ipt,ip6t,ebt}_LOG compatibility The nfnetlink_log infrastructure changes broke compatiblity of the LOG targets. They currently use whatever log backend was registered first, which means that if ipt_ULOG was loaded first, no messages will be printed to the ring buffer anymore. Restore compatiblity by using the old log functions by default and only use the nf_log backend if the user explicitly said so. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_bridge/ebt_log.h | 1 + include/linux/netfilter_ipv4/ipt_LOG.h | 3 ++- include/linux/netfilter_ipv6/ip6t_LOG.h | 3 ++- net/bridge/netfilter/ebt_log.c | 7 ++++++- net/ipv4/netfilter/ipt_LOG.c | 7 ++++++- net/ipv6/netfilter/ip6t_LOG.c | 7 ++++++- 6 files changed, 23 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h index 358fbc84fb59..96e231ae7554 100644 --- a/include/linux/netfilter_bridge/ebt_log.h +++ b/include/linux/netfilter_bridge/ebt_log.h @@ -3,6 +3,7 @@ #define EBT_LOG_IP 0x01 /* if the frame is made by ip, log the ip information */ #define EBT_LOG_ARP 0x02 +#define EBT_LOG_NFLOG 0x04 #define EBT_LOG_MASK (EBT_LOG_IP | EBT_LOG_ARP) #define EBT_LOG_PREFIX_SIZE 30 #define EBT_LOG_WATCHER "log" diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h index 22d16177319b..892f9a33fea8 100644 --- a/include/linux/netfilter_ipv4/ipt_LOG.h +++ b/include/linux/netfilter_ipv4/ipt_LOG.h @@ -6,7 +6,8 @@ #define IPT_LOG_TCPOPT 0x02 /* Log TCP options */ #define IPT_LOG_IPOPT 0x04 /* Log IP options */ #define IPT_LOG_UID 0x08 /* Log UID owning local socket */ -#define IPT_LOG_MASK 0x0f +#define IPT_LOG_NFLOG 0x10 /* Log using nf_log backend */ +#define IPT_LOG_MASK 0x1f struct ipt_log_info { unsigned char level; diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h index 9008ff5c40ae..060c1a1c6c60 100644 --- a/include/linux/netfilter_ipv6/ip6t_LOG.h +++ b/include/linux/netfilter_ipv6/ip6t_LOG.h @@ -6,7 +6,8 @@ #define IP6T_LOG_TCPOPT 0x02 /* Log TCP options */ #define IP6T_LOG_IPOPT 0x04 /* Log IP options */ #define IP6T_LOG_UID 0x08 /* Log UID owning local socket */ -#define IP6T_LOG_MASK 0x0f +#define IP6T_LOG_NFLOG 0x10 /* Log using nf_log backend */ +#define IP6T_LOG_MASK 0x1f struct ip6t_log_info { unsigned char level; diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 0128fbbe2328..288ff1d4ccc4 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -166,7 +166,12 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, li.u.log.level = info->loglevel; li.u.log.logflags = info->bitmask; - nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, info->prefix); + if (info->bitmask & EBT_LOG_NFLOG) + nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, + info->prefix); + else + ebt_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, + info->prefix); } static struct ebt_watcher log = diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 6606ddb66a29..cc27545ff97f 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -425,7 +425,12 @@ ipt_log_target(struct sk_buff **pskb, li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - nf_log_packet(PF_INET, hooknum, *pskb, in, out, &li, loginfo->prefix); + if (loginfo->logflags & IPT_LOG_NFLOG) + nf_log_packet(PF_INET, hooknum, *pskb, in, out, &li, + loginfo->prefix); + else + ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li, + loginfo->prefix); return IPT_CONTINUE; } diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 77c725832dec..6b930efa9fb9 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -436,7 +436,12 @@ ip6t_log_target(struct sk_buff **pskb, li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - nf_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, loginfo->prefix); + if (loginfo->logflags & IP6T_LOG_NFLOG) + nf_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, + loginfo->prefix); + else + ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, + loginfo->prefix); return IP6T_CONTINUE; } -- cgit v1.2.3 From 18810d1ebac89232d8f218a318ed9ff7ef198e96 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 23 Feb 2006 13:16:44 -0600 Subject: [IA64-SGI] Make number of TIO nodes configurable Make the limit for the number of TIO nodes a function of the number of C/M nodes in the system instead of a hardcoded constant. The number of TIO nodes should be the same as the number of C/M nodes. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- include/asm-ia64/sn/arch.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-ia64/sn/arch.h b/include/asm-ia64/sn/arch.h index 91c31be87b13..16adc93d7a72 100644 --- a/include/asm-ia64/sn/arch.h +++ b/include/asm-ia64/sn/arch.h @@ -31,7 +31,8 @@ * to ACPI3.0, this limit will be removed. The notion of "compact nodes" * should be deleted and TIOs should be included in MAX_NUMNODES. */ -#define MAX_COMPACT_NODES 512 +#define MAX_TIO_NODES MAX_NUMNODES +#define MAX_COMPACT_NODES (MAX_NUMNODES + MAX_TIO_NODES) /* * Maximum number of nodes in all partitions and in all coherency domains. -- cgit v1.2.3 From 637029c6cb5efcbaa3d5831af4c1972bdd629779 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 27 Feb 2006 20:41:56 -0800 Subject: Revert "[PATCH] x86_64: Only do the clustered systems have unsynchronized TSC assumption on IBM systems" This reverts commit 13a229abc25640813f1480c0478dfc6bdbc1c19e. Quoth Andi: "After some consideration and feedback from various people it turns out this wasn't that good an idea. It has some problems and needs more work. Since it was only an optimization anyways it's best to just back it out again for now." Signed-off-by: Linus Torvalds --- arch/i386/kernel/acpi/boot.c | 3 +++ arch/x86_64/kernel/apic.c | 9 +-------- include/asm-x86_64/acpi.h | 14 -------------- 3 files changed, 4 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 8309a7b2cd63..79577f0ace98 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -44,6 +44,9 @@ extern void __init clustered_apic_check(void); extern int gsi_irq_sharing(int gsi); #include +static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } + + #else /* X86 */ #ifdef CONFIG_X86_LOCAL_APIC diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index d70605eda333..e5b14c57eaa0 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -962,14 +962,12 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) irq_exit(); } -int __initdata unsync_tsc_on_multicluster; - /* * oem_force_hpet_timer -- force HPET mode for some boxes. * * Thus far, the major user of this is IBM's Summit2 series: * - * Some clustered boxes may have unsynced TSC problems if they are + * Clustered boxes may have unsynced TSC problems if they are * multi-chassis. Use available data to take a good guess. * If in doubt, go HPET. */ @@ -979,11 +977,6 @@ __cpuinit int oem_force_hpet_timer(void) unsigned id; DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); - /* Only do this check on IBM machines - big Unisys systems - use multiple clusters too, but have synchronized TSC */ - if (!unsync_tsc_on_multicluster) - return 0; - bitmap_zero(clustermap, NUM_APIC_CLUSTERS); for (i = 0; i < NR_CPUS; i++) { diff --git a/include/asm-x86_64/acpi.h b/include/asm-x86_64/acpi.h index e2b9923189a0..aa1c7b2e438c 100644 --- a/include/asm-x86_64/acpi.h +++ b/include/asm-x86_64/acpi.h @@ -164,20 +164,6 @@ extern u8 x86_acpiid_to_apicid[]; extern int acpi_skip_timer_override; -extern int unsync_tsc_on_multicluster; - -static inline int acpi_madt_oem_check(char *oem, char *productid) -{ - /* Copied from i386. Probably has too many entries. */ - if (!strncmp(oem, "IBM ENSW", 8) && - (!strncmp(productid, "VIGIL SMP", 9) - || !strncmp(productid, "EXA", 3) - || !strncmp(productid, "RUTHLESS SMP", 12))) { - unsync_tsc_on_multicluster = 1; - } - return 0; -} - #endif /*__KERNEL__*/ #endif /*_ASM_ACPI_H*/ -- cgit v1.2.3 From 827c1a6c1a5dcb2902fecfb648f9af6a532934eb Mon Sep 17 00:00:00 2001 From: John Rose Date: Fri, 24 Feb 2006 11:34:23 -0600 Subject: [PATCH] powerpc: fix dynamic PCI probe regression Some hotplug driver functions were migrated to the kernel for use by EEH in commit 2bf6a8fa21570f37fd1789610da30f70a05ac5e3. Previously, the PCI Hotplug module had been changed to use the new OFDT-based PCI probe when appropriate: 5fa80fcdca9d20d30c9ecec30d4dbff4ed93a5c6 When rpaphp_pci_config_slot() was moved from the rpaphp driver to the new kernel function pcibios_add_pci_devices(), the OFDT-based probe stuff was dropped. This patch restores it. Signed-off-by: John Rose Signed-off-by: Paul Mackerras --- arch/powerpc/platforms/pseries/eeh.c | 14 ++++++++++++ arch/powerpc/platforms/pseries/pci_dlpar.c | 36 +++++++++++++++++------------- include/asm-powerpc/eeh.h | 7 +++--- 3 files changed, 38 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 83578313ee7e..2ab9dcdfb415 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c @@ -893,6 +893,20 @@ void eeh_add_device_tree_early(struct device_node *dn) } EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); +void eeh_add_device_tree_late(struct pci_bus *bus) +{ + struct pci_dev *dev; + + list_for_each_entry(dev, &bus->devices, bus_list) { + eeh_add_device_late(dev); + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { + struct pci_bus *subbus = dev->subordinate; + if (subbus) + eeh_add_device_tree_late(subbus); + } + } +} + /** * eeh_add_device_late - perform EEH initialization for the indicated pci device * @dev: pci device for which to set up EEH diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index bdaa8aabdaa6..f3bad900bbcf 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -106,6 +106,8 @@ pcibios_fixup_new_pci_devices(struct pci_bus *bus, int fix_bus) } } } + + eeh_add_device_tree_late(bus); } EXPORT_SYMBOL_GPL(pcibios_fixup_new_pci_devices); @@ -114,7 +116,6 @@ pcibios_pci_config_bridge(struct pci_dev *dev) { u8 sec_busno; struct pci_bus *child_bus; - struct pci_dev *child_dev; /* Get busno of downstream bus */ pci_read_config_byte(dev, PCI_SECONDARY_BUS, &sec_busno); @@ -129,10 +130,6 @@ pcibios_pci_config_bridge(struct pci_dev *dev) pci_scan_child_bus(child_bus); - list_for_each_entry(child_dev, &child_bus->devices, bus_list) { - eeh_add_device_late(child_dev); - } - /* Fixup new pci devices without touching bus struct */ pcibios_fixup_new_pci_devices(child_bus, 0); @@ -160,18 +157,25 @@ pcibios_add_pci_devices(struct pci_bus * bus) eeh_add_device_tree_early(dn); - /* pci_scan_slot should find all children */ - slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); - num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); - if (num) { - pcibios_fixup_new_pci_devices(bus, 1); - pci_bus_add_devices(bus); - } + if (_machine == PLATFORM_PSERIES_LPAR) { + /* use ofdt-based probe */ + of_scan_bus(dn, bus); + if (!list_empty(&bus->devices)) { + pcibios_fixup_new_pci_devices(bus, 0); + pci_bus_add_devices(bus); + } + } else { + /* use legacy probe */ + slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); + num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); + if (num) { + pcibios_fixup_new_pci_devices(bus, 1); + pci_bus_add_devices(bus); + } - list_for_each_entry(dev, &bus->devices, bus_list) { - eeh_add_device_late (dev); - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) - pcibios_pci_config_bridge(dev); + list_for_each_entry(dev, &bus->devices, bus_list) + if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + pcibios_pci_config_bridge(dev); } } EXPORT_SYMBOL_GPL(pcibios_add_pci_devices); diff --git a/include/asm-powerpc/eeh.h b/include/asm-powerpc/eeh.h index b263fb2fa6e4..7dfb408fe2ca 100644 --- a/include/asm-powerpc/eeh.h +++ b/include/asm-powerpc/eeh.h @@ -27,6 +27,7 @@ #include struct pci_dev; +struct pci_bus; struct device_node; #ifdef CONFIG_EEH @@ -61,7 +62,7 @@ void __init pci_addr_cache_build(void); */ void eeh_add_device_early(struct device_node *); void eeh_add_device_tree_early(struct device_node *); -void eeh_add_device_late(struct pci_dev *); +void eeh_add_device_tree_late(struct pci_bus *); /** * eeh_remove_device - undo EEH setup for the indicated pci device @@ -116,12 +117,12 @@ static inline void pci_addr_cache_build(void) { } static inline void eeh_add_device_early(struct device_node *dn) { } -static inline void eeh_add_device_late(struct pci_dev *dev) { } - static inline void eeh_remove_device(struct pci_dev *dev) { } static inline void eeh_add_device_tree_early(struct device_node *dn) { } +static inline void eeh_add_device_tree_late(struct pci_bus *bus) { } + static inline void eeh_remove_bus_device(struct pci_dev *dev) { } #define EEH_POSSIBLE_ERROR(val, type) (0) #define EEH_IO_ERROR_VALUE(size) (-1UL) -- cgit v1.2.3 From 778e2ac5970e445f8c6b7d8aa597ac162afe270a Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 28 Feb 2006 17:04:20 +0000 Subject: [MIPS] Fix build error on processors that don's support copy-on-write. Signed-off-by: Ralf Baechle --- arch/mips/lib/iomap.c | 2 +- include/asm-mips/io.h | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/mips/lib/iomap.c b/arch/mips/lib/iomap.c index 7e2ced715cfb..f4ac5bbcd81f 100644 --- a/arch/mips/lib/iomap.c +++ b/arch/mips/lib/iomap.c @@ -63,7 +63,7 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) return ioport_map(start, len); if (flags & IORESOURCE_MEM) { if (flags & IORESOURCE_CACHEABLE) - return ioremap_cacheable_cow(start, len); + return ioremap_cachable(start, len); return ioremap_nocache(start, len); } diff --git a/include/asm-mips/io.h b/include/asm-mips/io.h index 5a4c8a54b8f4..8c011aa61afa 100644 --- a/include/asm-mips/io.h +++ b/include/asm-mips/io.h @@ -282,6 +282,24 @@ static inline void __iomem * __ioremap_mode(phys_t offset, unsigned long size, #define ioremap_nocache(offset, size) \ __ioremap_mode((offset), (size), _CACHE_UNCACHED) +/* + * ioremap_cachable - map bus memory into CPU space + * @offset: bus address of the memory + * @size: size of the resource to map + * + * ioremap_nocache performs a platform specific sequence of operations to + * make bus memory CPU accessible via the readb/readw/readl/writeb/ + * writew/writel functions and the other mmio helpers. The returned + * address is not guaranteed to be usable directly as a virtual + * address. + * + * This version of ioremap ensures that the memory is marked cachable by + * the CPU. Also enables full write-combining. Useful for some + * memory-like regions on I/O busses. + */ +#define ioremap_cachable(offset, size) \ + __ioremap_mode((offset), (size), PAGE_CACHABLE_DEFAULT) + /* * These two are MIPS specific ioremap variant. ioremap_cacheable_cow * requests a cachable mapping, ioremap_uncached_accelerated requests a -- cgit v1.2.3 From d2b176ed878d4d5fcc0bd35656dfd373f3702af9 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Tue, 28 Feb 2006 09:42:23 -0800 Subject: [IA64] sysctl option to silence unaligned trap warnings Allow sysadmin to disable all warnings about userland apps making unaligned accesses by using: # echo 1 > /proc/sys/kernel/ignore-unaligned-usertrap Rather than having to use prctl on a process by process basis. Default behaivour leaves the warnings enabled. Signed-off-by: Jes Sorensen Signed-off-by: Tony Luck --- arch/ia64/kernel/unaligned.c | 31 ++++++++++++++++++++++++++++--- include/linux/sysctl.h | 1 + kernel/sysctl.c | 14 ++++++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 112913896844..1e357550c776 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -52,6 +52,15 @@ dump (const char *str, void *vp, size_t len) #define IA64_FIRST_ROTATING_FR 32 #define SIGN_EXT9 0xffffffffffffff00ul +/* + * sysctl settable hook which tells the kernel whether to honor the + * IA64_THREAD_UAC_NOPRINT prctl. Because this is user settable, we want + * to allow the super user to enable/disable this for security reasons + * (i.e. don't allow attacker to fill up logs with unaligned accesses). + */ +int no_unaligned_warning; +static int noprint_warning; + /* * For M-unit: * @@ -1324,8 +1333,9 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0) goto force_sigbus; - if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT) - && within_logging_rate_limit()) + if (!no_unaligned_warning && + !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) && + within_logging_rate_limit()) { char buf[200]; /* comm[] is at most 16 bytes... */ size_t len; @@ -1340,7 +1350,22 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) if (user_mode(regs)) tty_write_message(current->signal->tty, buf); buf[len-1] = '\0'; /* drop '\r' */ - printk(KERN_WARNING "%s", buf); /* watch for command names containing %s */ + /* watch for command names containing %s */ + printk(KERN_WARNING "%s", buf); + } else { + if (no_unaligned_warning && !noprint_warning) { + noprint_warning = 1; + printk(KERN_WARNING "%s(%d) encountered an " + "unaligned exception which required\n" + "kernel assistance, which degrades " + "the performance of the application.\n" + "Unaligned exception warnings have " + "been disabled by the system " + "administrator\n" + "echo 0 > /proc/sys/kernel/ignore-" + "unaligned-usertrap to re-enable\n", + current->comm, current->pid); + } } } else { if (within_logging_rate_limit()) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 0e92bf7ec28e..bac61db26456 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -147,6 +147,7 @@ enum KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */ KERN_SPIN_RETRY=70, /* int: number of spinlock retries */ KERN_ACPI_VIDEO_FLAGS=71, /* int: flags for setting up video after ACPI sleep */ + KERN_IA64_UNALIGNED=72, /* int: ia64 unaligned userland trap enable */ }; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c05a2b7125e1..acf6c1550f27 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -124,6 +124,10 @@ extern int sysctl_hz_timer; extern int acct_parm[]; #endif +#ifdef CONFIG_IA64 +extern int no_unaligned_warning; +#endif + static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, ctl_table *, void **); static int proc_doutsstring(ctl_table *table, int write, struct file *filp, @@ -665,6 +669,16 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif +#ifdef CONFIG_IA64 + { + .ctl_name = KERN_IA64_UNALIGNED, + .procname = "ignore-unaligned-usertrap", + .data = &no_unaligned_warning, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif { .ctl_name = 0 } }; -- cgit v1.2.3 From 0551fbd29e16fccd46e41b7d01bf0f8f39b14212 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 28 Feb 2006 16:59:19 -0800 Subject: [PATCH] Add mm->task_size and fix powerpc vdso This patch adds mm->task_size to keep track of the task size of a given mm and uses that to fix the powerpc vdso so that it uses the mm task size to decide what pages to fault in instead of the current thread flags (which broke when ptracing). (akpm: I expect that mm_struct.task_size will become the way in which we finally sort out the confusion between 32-bit processes and 32-bit mm's. It may need tweaks, but at this stage this patch is powerpc-only.) Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/vdso.c | 4 ++-- fs/exec.c | 6 ++++++ include/linux/sched.h | 5 +++-- 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index f0c47dab0903..04f7df39ffbb 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -182,8 +182,8 @@ static struct page * vdso_vma_nopage(struct vm_area_struct * vma, unsigned long offset = address - vma->vm_start; struct page *pg; #ifdef CONFIG_PPC64 - void *vbase = test_thread_flag(TIF_32BIT) ? - vdso32_kbase : vdso64_kbase; + void *vbase = (vma->vm_mm->task_size > TASK_SIZE_USER32) ? + vdso64_kbase : vdso32_kbase; #else void *vbase = vdso32_kbase; #endif diff --git a/fs/exec.c b/fs/exec.c index 0e1c95074d42..0b515ac53134 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -885,6 +885,12 @@ int flush_old_exec(struct linux_binprm * bprm) current->flags &= ~PF_RANDOMIZE; flush_thread(); + /* Set the new mm task size. We have to do that late because it may + * depend on TIF_32BIT which is only updated in flush_thread() on + * some architectures like powerpc + */ + current->mm->task_size = TASK_SIZE; + if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || file_permission(bprm->file, MAY_READ) || (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { diff --git a/include/linux/sched.h b/include/linux/sched.h index b6f51e3a38ec..ff2e09c953b9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -298,8 +298,9 @@ struct mm_struct { unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); void (*unmap_area) (struct mm_struct *mm, unsigned long addr); - unsigned long mmap_base; /* base of mmap area */ - unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ + unsigned long mmap_base; /* base of mmap area */ + unsigned long task_size; /* size of task vm space */ + unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */ pgd_t * pgd; atomic_t mm_users; /* How many users with user space? */ -- cgit v1.2.3 From f0892b89e3c19c7d805825ca12511d26dcdf6415 Mon Sep 17 00:00:00 2001 From: Pavel Roskin Date: Tue, 28 Feb 2006 01:18:29 -0500 Subject: [PATCH] pcmcia: Add macro to match PCMCIA cards by numeric ID and first vendor string This is needed to distinguish Intersil and non-Intersil cards with numeric ID 0x0156, 0x0002. Signed-off-by: Pavel Roskin Signed-off-by: Dominik Brodowski --- include/pcmcia/device_id.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/pcmcia/device_id.h b/include/pcmcia/device_id.h index 346d81ece287..e04e0b0d9a25 100644 --- a/include/pcmcia/device_id.h +++ b/include/pcmcia/device_id.h @@ -72,6 +72,15 @@ .prod_id = { (v1), (v2), (v3), (v4) }, \ .prod_id_hash = { (vh1), (vh2), (vh3), (vh4) }, } +#define PCMCIA_DEVICE_MANF_CARD_PROD_ID1(manf, card, v1, vh1) { \ + .match_flags = PCMCIA_DEV_ID_MATCH_MANF_ID| \ + PCMCIA_DEV_ID_MATCH_CARD_ID| \ + PCMCIA_DEV_ID_MATCH_PROD_ID1, \ + .manf_id = (manf), \ + .card_id = (card), \ + .prod_id = { (v1), NULL, NULL, NULL }, \ + .prod_id_hash = { (vh1), 0, 0, 0 }, } + /* multi-function devices */ -- cgit v1.2.3 From e5cef95d58d1e711b0bd6b00018278a06defb274 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Wed, 1 Mar 2006 13:46:00 -0800 Subject: [PATCH] fix build breakage in eeh.c in 2.6.16-rc5-git5 This patch should fixe a problem with eeh_add_device_late() not being defined in the ppc64 build process, causing the build to break. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- include/asm-powerpc/eeh.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-powerpc/eeh.h b/include/asm-powerpc/eeh.h index 7dfb408fe2ca..eb392032e19b 100644 --- a/include/asm-powerpc/eeh.h +++ b/include/asm-powerpc/eeh.h @@ -61,6 +61,7 @@ void __init pci_addr_cache_build(void); * to finish the eeh setup for this device. */ void eeh_add_device_early(struct device_node *); +void eeh_add_device_late(struct pci_dev *dev); void eeh_add_device_tree_early(struct device_node *); void eeh_add_device_tree_late(struct pci_bus *); -- cgit v1.2.3 From 3af1efe8a301f5b1c813f5f761cb1e10d6175605 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 2 Mar 2006 13:25:26 -0500 Subject: [PATCH] reiserfs: fix unaligned bitmap usage The bitmaps associated with generation numbers for directory entries are declared as an array of ints. On some platforms, this causes alignment exceptions. The following patch uses the standard bitmap declaration macros to declare the bitmaps, fixing the problem. Originally from Takashi Iwai. Signed-off-by: Takashi Iwai Acked-by: Jeff Mahoney Signed-off-by: Linus Torvalds --- fs/reiserfs/namei.c | 8 ++++---- include/linux/reiserfs_fs.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index c8123308e060..284f7852de8b 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -247,7 +247,7 @@ static int linear_search_in_dir_item(struct cpu_key *key, /* mark, that this generation number is used */ if (de->de_gen_number_bit_string) set_bit(GET_GENERATION_NUMBER(deh_offset(deh)), - (unsigned long *)de->de_gen_number_bit_string); + de->de_gen_number_bit_string); // calculate pointer to name and namelen de->de_entry_num = i; @@ -431,7 +431,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, struct reiserfs_de_head *deh; INITIALIZE_PATH(path); struct reiserfs_dir_entry de; - int bit_string[MAX_GENERATION_NUMBER / (sizeof(int) * 8) + 1]; + DECLARE_BITMAP(bit_string, MAX_GENERATION_NUMBER + 1); int gen_number; char small_buf[32 + DEH_SIZE]; /* 48 bytes now and we avoid kmalloc if we create file with short name */ @@ -486,7 +486,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, /* find the proper place for the new entry */ memset(bit_string, 0, sizeof(bit_string)); - de.de_gen_number_bit_string = (char *)bit_string; + de.de_gen_number_bit_string = bit_string; retval = reiserfs_find_entry(dir, name, namelen, &path, &de); if (retval != NAME_NOT_FOUND) { if (buffer != small_buf) @@ -508,7 +508,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th, } gen_number = - find_first_zero_bit((unsigned long *)bit_string, + find_first_zero_bit(bit_string, MAX_GENERATION_NUMBER + 1); if (gen_number > MAX_GENERATION_NUMBER) { /* there is no free generation number */ diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 7d51149bd793..dad78cecfd20 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -1052,7 +1052,7 @@ struct reiserfs_dir_entry { int de_entrylen; int de_namelen; char *de_name; - char *de_gen_number_bit_string; + unsigned long *de_gen_number_bit_string; __u32 de_dir_id; __u32 de_objectid; -- cgit v1.2.3 From aa5cb02143123289bd37c30c0ad60339f8da0bad Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 1 Mar 2006 15:07:07 +1100 Subject: [PATCH] powerpc: Expose SMT and L1 icache snoop userland features This patch makes userland aware of the icache snoop capability of the POWER5 (and possibly others in the future) and of SMT capabilities. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/cputable.c | 9 ++++++--- include/asm-powerpc/cputable.h | 2 ++ 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 10696456a4c6..e4e81374cb9a 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -53,8 +53,10 @@ extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); PPC_FEATURE_HAS_MMU) #define COMMON_USER_PPC64 (COMMON_USER | PPC_FEATURE_64) #define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4) -#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5) -#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS) +#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) +#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\ + PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP) #define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \ PPC_FEATURE_BOOKE) @@ -267,7 +269,8 @@ struct cpu_spec cpu_specs[] = { .cpu_name = "Cell Broadband Engine", .cpu_features = CPU_FTRS_CELL, .cpu_user_features = COMMON_USER_PPC64 | - PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP, + PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP | + PPC_FEATURE_SMT, .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_be, diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h index 90d005bb4d1c..5638518968c3 100644 --- a/include/asm-powerpc/cputable.h +++ b/include/asm-powerpc/cputable.h @@ -20,6 +20,8 @@ #define PPC_FEATURE_POWER5_PLUS 0x00020000 #define PPC_FEATURE_CELL 0x00010000 #define PPC_FEATURE_BOOKE 0x00008000 +#define PPC_FEATURE_SMT 0x00004000 +#define PPC_FEATURE_ICACHE_SNOOP 0x00002000 #ifdef __KERNEL__ #ifndef __ASSEMBLY__ -- cgit v1.2.3 From 141aa59b5347a4a021e37cfbc2258df9af9392f8 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 3 Mar 2006 16:24:06 +1100 Subject: [PATCH] powerpc: Fix incorrect pud_ERROR() message The powerpc pud_ERROR() function misleadingly prints a message indicating a pmd error. This patch fixes it. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- include/asm-powerpc/pgtable-4k.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-powerpc/pgtable-4k.h b/include/asm-powerpc/pgtable-4k.h index e9590c06ad92..80a7832d2721 100644 --- a/include/asm-powerpc/pgtable-4k.h +++ b/include/asm-powerpc/pgtable-4k.h @@ -88,4 +88,4 @@ (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) #define pud_ERROR(e) \ - printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e)) + printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) -- cgit v1.2.3 From 4d000d5b9689734006d89fe9b7597c758b74a9fb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 4 Mar 2006 23:23:56 -0800 Subject: [SPARC64]: Mark __ex_table section correctly. We must use the "a" (allocate) attribute every time we emit an entry into the __ex_table section. For consistency, use "a" instead of #alloc which is some Solaris compat cruft GNU as provides on Sparc. Signed-off-by: David S. Miller --- arch/sparc64/kernel/sys32.S | 2 +- arch/sparc64/kernel/una_asm.S | 4 ++-- arch/sparc64/lib/U1copy_from_user.S | 2 +- arch/sparc64/lib/U1copy_to_user.S | 2 +- arch/sparc64/lib/U3copy_from_user.S | 2 +- arch/sparc64/lib/U3copy_to_user.S | 2 +- arch/sparc64/lib/bzero.S | 2 +- arch/sparc64/lib/copy_in_user.S | 2 +- arch/sparc64/lib/csum_copy_from_user.S | 2 +- arch/sparc64/lib/csum_copy_to_user.S | 2 +- arch/sparc64/lib/strlen_user.S | 2 +- arch/sparc64/lib/strncpy_from_user.S | 2 +- arch/sparc64/solaris/entry64.S | 2 +- include/asm-sparc64/futex.h | 2 +- include/asm-sparc64/uaccess.h | 12 ++++++------ 15 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/sparc64/kernel/sys32.S b/arch/sparc64/kernel/sys32.S index 60b59375aa78..c4a1cef4b1e5 100644 --- a/arch/sparc64/kernel/sys32.S +++ b/arch/sparc64/kernel/sys32.S @@ -318,7 +318,7 @@ do_sys_recvmsg: /* compat_sys_recvmsg(int, struct compat_msghdr *, unsigned int) nop nop - .section __ex_table + .section __ex_table,"a" .align 4 .word 1b, __retl_efault, 2b, __retl_efault .word 3b, __retl_efault, 4b, __retl_efault diff --git a/arch/sparc64/kernel/una_asm.S b/arch/sparc64/kernel/una_asm.S index 1f5b5b708ce7..be183fe41443 100644 --- a/arch/sparc64/kernel/una_asm.S +++ b/arch/sparc64/kernel/una_asm.S @@ -47,7 +47,7 @@ __do_int_store: mov 0, %o0 .size __do_int_store, .-__do_int_store - .section __ex_table + .section __ex_table,"a" .word 4b, __retl_efault .word 5b, __retl_efault .word 6b, __retl_efault @@ -129,7 +129,7 @@ do_int_load: mov 0, %o0 .size __do_int_load, .-__do_int_load - .section __ex_table + .section __ex_table,"a" .word 4b, __retl_efault .word 5b, __retl_efault .word 6b, __retl_efault diff --git a/arch/sparc64/lib/U1copy_from_user.S b/arch/sparc64/lib/U1copy_from_user.S index 93146a81e2d3..3192b0bf4fab 100644 --- a/arch/sparc64/lib/U1copy_from_user.S +++ b/arch/sparc64/lib/U1copy_from_user.S @@ -9,7 +9,7 @@ .align 4; \ 99: retl; \ mov 1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/U1copy_to_user.S b/arch/sparc64/lib/U1copy_to_user.S index 1fccc521e2bd..d1210ffb0b82 100644 --- a/arch/sparc64/lib/U1copy_to_user.S +++ b/arch/sparc64/lib/U1copy_to_user.S @@ -9,7 +9,7 @@ .align 4; \ 99: retl; \ mov 1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/U3copy_from_user.S b/arch/sparc64/lib/U3copy_from_user.S index df600b667e48..f5bfc8d9d216 100644 --- a/arch/sparc64/lib/U3copy_from_user.S +++ b/arch/sparc64/lib/U3copy_from_user.S @@ -9,7 +9,7 @@ .align 4; \ 99: retl; \ mov 1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/U3copy_to_user.S b/arch/sparc64/lib/U3copy_to_user.S index f337f22ed82e..2334f111bb0c 100644 --- a/arch/sparc64/lib/U3copy_to_user.S +++ b/arch/sparc64/lib/U3copy_to_user.S @@ -9,7 +9,7 @@ .align 4; \ 99: retl; \ mov 1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/bzero.S b/arch/sparc64/lib/bzero.S index 21a933ffb7c2..1d2abcfa4e52 100644 --- a/arch/sparc64/lib/bzero.S +++ b/arch/sparc64/lib/bzero.S @@ -92,7 +92,7 @@ __bzero_done: .align 4; \ 99: retl; \ mov %o1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/copy_in_user.S b/arch/sparc64/lib/copy_in_user.S index 816076c0bc06..650af3f21f78 100644 --- a/arch/sparc64/lib/copy_in_user.S +++ b/arch/sparc64/lib/copy_in_user.S @@ -13,7 +13,7 @@ .align 4; \ 99: retl; \ mov 1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/csum_copy_from_user.S b/arch/sparc64/lib/csum_copy_from_user.S index 817ebdae39f8..a22eddbe5dba 100644 --- a/arch/sparc64/lib/csum_copy_from_user.S +++ b/arch/sparc64/lib/csum_copy_from_user.S @@ -9,7 +9,7 @@ .align 4; \ 99: retl; \ mov -1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/csum_copy_to_user.S b/arch/sparc64/lib/csum_copy_to_user.S index c2f9463ea1e2..d5b12f441f02 100644 --- a/arch/sparc64/lib/csum_copy_to_user.S +++ b/arch/sparc64/lib/csum_copy_to_user.S @@ -9,7 +9,7 @@ .align 4; \ 99: retl; \ mov -1, %o0; \ - .section __ex_table; \ + .section __ex_table,"a";\ .align 4; \ .word 98b, 99b; \ .text; \ diff --git a/arch/sparc64/lib/strlen_user.S b/arch/sparc64/lib/strlen_user.S index 9ed54ba14fc6..114ed111e251 100644 --- a/arch/sparc64/lib/strlen_user.S +++ b/arch/sparc64/lib/strlen_user.S @@ -85,7 +85,7 @@ __strnlen_user: retl clr %o0 - .section __ex_table,#alloc + .section __ex_table,"a" .align 4 .word 10b, 30b diff --git a/arch/sparc64/lib/strncpy_from_user.S b/arch/sparc64/lib/strncpy_from_user.S index e1264650ca7a..b2f499f79427 100644 --- a/arch/sparc64/lib/strncpy_from_user.S +++ b/arch/sparc64/lib/strncpy_from_user.S @@ -125,7 +125,7 @@ __strncpy_from_user: add %o2, %o3, %o0 .size __strncpy_from_user, .-__strncpy_from_user - .section __ex_table,#alloc + .section __ex_table,"a" .align 4 .word 60b, __retl_efault .word 61b, __retl_efault diff --git a/arch/sparc64/solaris/entry64.S b/arch/sparc64/solaris/entry64.S index eb314ed23cdb..f170324e8bf2 100644 --- a/arch/sparc64/solaris/entry64.S +++ b/arch/sparc64/solaris/entry64.S @@ -217,7 +217,7 @@ solaris_unimplemented: ba,pt %xcc, ret_from_solaris nop - .section __ex_table,#alloc + .section __ex_table,"a" .align 4 .word exen, exenf diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h index 0caf60147e97..34c4b43d3f98 100644 --- a/include/asm-sparc64/futex.h +++ b/include/asm-sparc64/futex.h @@ -20,7 +20,7 @@ "4: ba 3b\n" \ " mov %5, %0\n" \ " .previous\n" \ - " .section __ex_table,#alloc\n" \ + " .section __ex_table,\"a\"\n" \ " .align 4\n" \ " .word 1b, 4b\n" \ " .word 2b, 4b\n" \ diff --git a/include/asm-sparc64/uaccess.h b/include/asm-sparc64/uaccess.h index 203e8eee6351..c91d1e38eac6 100644 --- a/include/asm-sparc64/uaccess.h +++ b/include/asm-sparc64/uaccess.h @@ -136,7 +136,7 @@ __asm__ __volatile__( \ "b 2b\n\t" \ " mov %3, %0\n\n\t" \ ".previous\n\t" \ - ".section __ex_table,#alloc\n\t" \ + ".section __ex_table,\"a\"\n\t" \ ".align 4\n\t" \ ".word 1b, 3b\n\t" \ ".previous\n\n\t" \ @@ -148,7 +148,7 @@ if (__builtin_constant_p(ret) && ret == -EFAULT) \ __asm__ __volatile__( \ "/* Put user asm ret, inline. */\n" \ "1:\t" "st"#size "a %1, [%2] %%asi\n\n\t" \ - ".section __ex_table,#alloc\n\t" \ + ".section __ex_table,\"a\"\n\t" \ ".align 4\n\t" \ ".word 1b, __ret_efault\n\n\t" \ ".previous\n\n\t" \ @@ -163,7 +163,7 @@ __asm__ __volatile__( \ "ret\n\t" \ " restore %%g0, %3, %%o0\n\n\t" \ ".previous\n\t" \ - ".section __ex_table,#alloc\n\t" \ + ".section __ex_table,\"a\"\n\t" \ ".align 4\n\t" \ ".word 1b, 3b\n\n\t" \ ".previous\n\n\t" \ @@ -206,7 +206,7 @@ __asm__ __volatile__( \ "b 2b\n\t" \ " mov %3, %0\n\n\t" \ ".previous\n\t" \ - ".section __ex_table,#alloc\n\t" \ + ".section __ex_table,\"a\"\n\t" \ ".align 4\n\t" \ ".word 1b, 3b\n\n\t" \ ".previous\n\t" \ @@ -218,7 +218,7 @@ if (__builtin_constant_p(retval) && retval == -EFAULT) \ __asm__ __volatile__( \ "/* Get user asm ret, inline. */\n" \ "1:\t" "ld"#size "a [%1] %%asi, %0\n\n\t" \ - ".section __ex_table,#alloc\n\t" \ + ".section __ex_table,\"a\"\n\t" \ ".align 4\n\t" \ ".word 1b,__ret_efault\n\n\t" \ ".previous\n\t" \ @@ -233,7 +233,7 @@ __asm__ __volatile__( \ "ret\n\t" \ " restore %%g0, %2, %%o0\n\n\t" \ ".previous\n\t" \ - ".section __ex_table,#alloc\n\t" \ + ".section __ex_table,\"a\"\n\t" \ ".align 4\n\t" \ ".word 1b, 3b\n\n\t" \ ".previous\n\t" \ -- cgit v1.2.3 From 1e4b27df55166ce3b276f55bab223fa4ae8c5525 Mon Sep 17 00:00:00 2001 From: Karsten Keil Date: Mon, 6 Mar 2006 15:42:37 -0800 Subject: [PATCH] i4l: add new PCI IDs for HFC-S PCI Add new PCI IDs for HFC-S PCI based ISDN TA 'Primux II S0' and 'Primux II S0' from Gerdes AG Signed-off-by: Martin Bachem Signed-off-by: Karsten Keil Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/isdn/hisax/config.c | 2 ++ drivers/isdn/hisax/hfc_pci.c | 2 ++ include/linux/pci_ids.h | 2 ++ 3 files changed, 6 insertions(+) (limited to 'include') diff --git a/drivers/isdn/hisax/config.c b/drivers/isdn/hisax/config.c index 8159bcecd0c2..df9d65201819 100644 --- a/drivers/isdn/hisax/config.c +++ b/drivers/isdn/hisax/config.c @@ -1929,6 +1929,8 @@ static struct pci_device_id hisax_pci_tbl[] __initdata = { {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B00B, PCI_ANY_ID, PCI_ANY_ID}, {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B00C, PCI_ANY_ID, PCI_ANY_ID}, {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B100, PCI_ANY_ID, PCI_ANY_ID}, + {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B700, PCI_ANY_ID, PCI_ANY_ID}, + {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B701, PCI_ANY_ID, PCI_ANY_ID}, {PCI_VENDOR_ID_ABOCOM, PCI_DEVICE_ID_ABOCOM_2BD1, PCI_ANY_ID, PCI_ANY_ID}, {PCI_VENDOR_ID_ASUSTEK, PCI_DEVICE_ID_ASUSTEK_0675, PCI_ANY_ID, PCI_ANY_ID}, {PCI_VENDOR_ID_BERKOM, PCI_DEVICE_ID_BERKOM_T_CONCEPT, PCI_ANY_ID, PCI_ANY_ID}, diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c index 4866fc32d8d9..91d25acb5ede 100644 --- a/drivers/isdn/hisax/hfc_pci.c +++ b/drivers/isdn/hisax/hfc_pci.c @@ -51,6 +51,8 @@ static const PCI_ENTRY id_list[] = {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B00B, "Billion", "B00B"}, {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B00C, "Billion", "B00C"}, {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B100, "Seyeon", "B100"}, + {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B700, "Primux II S0", "B700"}, + {PCI_VENDOR_ID_CCD, PCI_DEVICE_ID_CCD_B701, "Primux II S0 NT", "B701"}, {PCI_VENDOR_ID_ABOCOM, PCI_DEVICE_ID_ABOCOM_2BD1, "Abocom/Magitek", "2BD1"}, {PCI_VENDOR_ID_ASUSTEK, PCI_DEVICE_ID_ASUSTEK_0675, "Asuscom/Askey", "675"}, {PCI_VENDOR_ID_BERKOM, PCI_DEVICE_ID_BERKOM_T_CONCEPT, "German telekom", "T-Concept"}, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 82b83da25d77..1709b5009d2e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1752,6 +1752,8 @@ #define PCI_DEVICE_ID_CCD_B00B 0xb00b #define PCI_DEVICE_ID_CCD_B00C 0xb00c #define PCI_DEVICE_ID_CCD_B100 0xb100 +#define PCI_DEVICE_ID_CCD_B700 0xb700 +#define PCI_DEVICE_ID_CCD_B701 0xb701 #define PCI_VENDOR_ID_EXAR 0x13a8 #define PCI_DEVICE_ID_EXAR_XR17C152 0x0152 -- cgit v1.2.3 From 69239749e1ac4f3496906aa4267cb9f61ce52c9c Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 6 Mar 2006 15:42:45 -0800 Subject: [PATCH] fix next_timer_interrupt() for hrtimer Also from Thomas Gleixner Function next_timer_interrupt() got broken with a recent patch 6ba1b91213e81aa92b5cf7539f7d2a94ff54947c as sys_nanosleep() was moved to hrtimer. This broke things as next_timer_interrupt() did not check hrtimer tree for next event. Function next_timer_interrupt() is needed with dyntick (CONFIG_NO_IDLE_HZ, VST) implementations, as the system can be in idle when next hrtimer event was supposed to happen. At least ARM and S390 currently use next_timer_interrupt(). Signed-off-by: Thomas Gleixner Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/time.c | 10 ++++++---- include/linux/hrtimer.h | 4 ++++ kernel/hrtimer.c | 35 +++++++++++++++++++++++++++++++++++ kernel/timer.c | 16 ++++++++++++++++ 4 files changed, 61 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index d7d932c02866..d6bd435a6857 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -422,12 +422,14 @@ static int timer_dyn_tick_disable(void) void timer_dyn_reprogram(void) { struct dyn_tick_timer *dyn_tick = system_timer->dyn_tick; + unsigned long next, seq; - if (dyn_tick) { - write_seqlock(&xtime_lock); - if (dyn_tick->state & DYN_TICK_ENABLED) + if (dyn_tick && (dyn_tick->state & DYN_TICK_ENABLED)) { + next = next_timer_interrupt(); + do { + seq = read_seqbegin(&xtime_lock); dyn_tick->reprogram(next_timer_interrupt() - jiffies); - write_sequnlock(&xtime_lock); + } while (read_seqretry(&xtime_lock, seq)); } } diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 6361544bb6ae..6401c31d6add 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -116,6 +116,10 @@ extern int hrtimer_try_to_cancel(struct hrtimer *timer); extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer); extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp); +#ifdef CONFIG_NO_IDLE_HZ +extern ktime_t hrtimer_get_next_event(void); +#endif + static inline int hrtimer_active(const struct hrtimer *timer) { return timer->state == HRTIMER_PENDING; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 5ae51f1bc7c8..14bc9cfa6399 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -505,6 +505,41 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer) return rem; } +#ifdef CONFIG_NO_IDLE_HZ +/** + * hrtimer_get_next_event - get the time until next expiry event + * + * Returns the delta to the next expiry event or KTIME_MAX if no timer + * is pending. + */ +ktime_t hrtimer_get_next_event(void) +{ + struct hrtimer_base *base = __get_cpu_var(hrtimer_bases); + ktime_t delta, mindelta = { .tv64 = KTIME_MAX }; + unsigned long flags; + int i; + + for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) { + struct hrtimer *timer; + + spin_lock_irqsave(&base->lock, flags); + if (!base->first) { + spin_unlock_irqrestore(&base->lock, flags); + continue; + } + timer = rb_entry(base->first, struct hrtimer, node); + delta.tv64 = timer->expires.tv64; + spin_unlock_irqrestore(&base->lock, flags); + delta = ktime_sub(delta, base->get_time()); + if (delta.tv64 < mindelta.tv64) + mindelta.tv64 = delta.tv64; + } + if (mindelta.tv64 < 0) + mindelta.tv64 = 0; + return mindelta; +} +#endif + /** * hrtimer_init - initialize a timer to the given clock * diff --git a/kernel/timer.c b/kernel/timer.c index fc6646fd5aab..8256f3f5ec0d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -489,9 +489,21 @@ unsigned long next_timer_interrupt(void) struct list_head *list; struct timer_list *nte; unsigned long expires; + unsigned long hr_expires = MAX_JIFFY_OFFSET; + ktime_t hr_delta; tvec_t *varray[4]; int i, j; + hr_delta = hrtimer_get_next_event(); + if (hr_delta.tv64 != KTIME_MAX) { + struct timespec tsdelta; + tsdelta = ktime_to_timespec(hr_delta); + hr_expires = timespec_to_jiffies(&tsdelta); + if (hr_expires < 3) + return hr_expires + jiffies; + } + hr_expires += jiffies; + base = &__get_cpu_var(tvec_bases); spin_lock(&base->t_base.lock); expires = base->timer_jiffies + (LONG_MAX >> 1); @@ -542,6 +554,10 @@ found: } } spin_unlock(&base->t_base.lock); + + if (time_before(hr_expires, expires)) + return hr_expires; + return expires; } #endif -- cgit v1.2.3 From 5c8338904653365bfb92385b38915becb903d8bb Mon Sep 17 00:00:00 2001 From: Jan Blunck Date: Mon, 6 Mar 2006 15:42:46 -0800 Subject: [PATCH] s390: fix compile with VIRT_CPU_ACCOUNTING=n When CONFIG_VIRT_CPU_ACCOUNTING is not defined compiling fails with an undefined reference to account_vtime(). Signed-off-by: Jan Blunck Signed-off-by: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-s390/system.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h index b2e65e8bf812..6a89dbb03c1e 100644 --- a/include/asm-s390/system.h +++ b/include/asm-s390/system.h @@ -118,6 +118,8 @@ static inline void sched_cacheflush(void) extern void account_vtime(struct task_struct *); extern void account_tick_vtime(struct task_struct *); extern void account_system_vtime(struct task_struct *); +#else +#define account_vtime(x) do { /* empty */ } while (0) #endif #define finish_arch_switch(prev) do { \ -- cgit v1.2.3 From 78679302fe428f4f3dc853a51ee24f306010d874 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 6 Mar 2006 15:42:49 -0800 Subject: [PATCH] memory-hotplug compile fix include/linux/memory_hotplug.h:53: warning: 'struct page' declared inside parameter list (akpm: I tossed in a couple more possibly-needed-sometime struct decls too) Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 01f03bc06eff..968b1aa3732c 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -6,6 +6,10 @@ #include #include +struct page; +struct zone; +struct pglist_data; + #ifdef CONFIG_MEMORY_HOTPLUG /* * pgdat resizing functions -- cgit v1.2.3 From a615fa83959896f8eac76c235953fb164cd1a9b9 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Mon, 6 Mar 2006 15:42:50 -0800 Subject: [PATCH] Increase max kmalloc size for very large systems Systems with extemely large numbers of nodes or cpus need to kmalloc structures larger than is currently supported. This patch increases the maximum supported size for very large systems. This patch should have no effect on current systems. (akpm: why not just use alloc_pages() for sysfs_cpus?) Signed-off-by: Jack Steiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kmalloc_sizes.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kmalloc_sizes.h b/include/linux/kmalloc_sizes.h index d82d4c05c12d..bda23e00ed71 100644 --- a/include/linux/kmalloc_sizes.h +++ b/include/linux/kmalloc_sizes.h @@ -19,8 +19,10 @@ CACHE(32768) CACHE(65536) CACHE(131072) -#ifndef CONFIG_MMU +#if (NR_CPUS > 512) || (MAX_NUMNODES > 256) || !defined(CONFIG_MMU) CACHE(262144) +#endif +#ifndef CONFIG_MMU CACHE(524288) CACHE(1048576) #ifdef CONFIG_LARGE_ALLOCS -- cgit v1.2.3 From 6a0e243069b09a323255f6e847c87d531961cd96 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 7 Mar 2006 14:42:27 +0000 Subject: [ARM] 3352/1: DSB required for the completion of a TLB maintenance operation Patch from Catalin Marinas Chapter B2.7.3 in the latest ARM ARM (with v6 information) states that the completion of a TLB maintenance operation is only guaranteed by the execution of a DSB (Data Syncronization Barrier, formerly Data Write Barrier or Drain Write Buffer). Note that a DSB is only needed in the flush_tlb_kernel_* functions since the completion is guaranteed by a mode change (i.e. switching back to user mode) for the flush_tlb_user_* functions. Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/Kconfig | 2 +- arch/arm/mm/tlb-v6.S | 1 + include/asm-arm/tlbflush.h | 6 ++++++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 15dc1a0dffbb..9f80fa502f8f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -78,7 +78,7 @@ menu "System Type" choice prompt "ARM system type" - default ARCH_RPC + default ARCH_VERSATILE config ARCH_CLPS7500 bool "Cirrus-CL-PS7500FE" diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S index 6f76b89ef46e..fd6adde39091 100644 --- a/arch/arm/mm/tlb-v6.S +++ b/arch/arm/mm/tlb-v6.S @@ -80,6 +80,7 @@ ENTRY(v6wbi_flush_kern_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b + mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier mov pc, lr .section ".text.init", #alloc, #execinstr diff --git a/include/asm-arm/tlbflush.h b/include/asm-arm/tlbflush.h index 9387a5e1ffe0..0c2acc944a0a 100644 --- a/include/asm-arm/tlbflush.h +++ b/include/asm-arm/tlbflush.h @@ -340,6 +340,12 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr) asm("mcr%? p15, 0, %0, c8, c6, 1" : : "r" (kaddr)); if (tlb_flag(TLB_V6_I_PAGE)) asm("mcr%? p15, 0, %0, c8, c5, 1" : : "r" (kaddr)); + + /* The ARM ARM states that the completion of a TLB maintenance + * operation is only guaranteed by a DSB instruction + */ + if (tlb_flag(TLB_V6_U_PAGE | TLB_V6_D_PAGE | TLB_V6_I_PAGE)) + asm("mcr%? p15, 0, %0, c7, c10, 4" : : "r" (zero)); } /* -- cgit v1.2.3 From 1bd79336a426c5e4f3bab142407059ceb12cadf9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 8 Mar 2006 13:24:22 +1100 Subject: powerpc: Fix various syscall/signal/swapcontext bugs A careful reading of the recent changes to the system call entry/exit paths revealed several problems, plus some things that could be simplified and improved: * 32-bit wasn't testing the _TIF_NOERROR bit in the syscall fast exit path, so it was only doing anything with it once it saw some other bit being set. In other words, the noerror behaviour would apply to the next system call where we had to reschedule or deliver a signal, which is not necessarily the current system call. * 32-bit wasn't doing the call to ptrace_notify in the syscall exit path when the _TIF_SINGLESTEP bit was set. * _TIF_RESTOREALL was in both _TIF_USER_WORK_MASK and _TIF_PERSYSCALL_MASK, which is odd since _TIF_RESTOREALL is only set by system calls. I took it out of _TIF_USER_WORK_MASK. * On 64-bit, _TIF_RESTOREALL wasn't causing the non-volatile registers to be restored (unless perhaps a signal was delivered or the syscall was traced or single-stepped). Thus the non-volatile registers weren't restored on exit from a signal handler. We probably got away with it mostly because signal handlers written in C wouldn't alter the non-volatile registers. * On 32-bit I simplified the code and made it more like 64-bit by making the syscall exit path jump to ret_from_except to handle preemption and signal delivery. * 32-bit was calling do_signal unnecessarily when _TIF_RESTOREALL was set - but I think because of that 32-bit was actually restoring the non-volatile registers on exit from a signal handler. * I changed the order of enabling interrupts and saving the non-volatile registers before calling do_syscall_trace_leave; now we enable interrupts first. Signed-off-by: Paul Mackerras --- arch/powerpc/kernel/asm-offsets.c | 1 - arch/powerpc/kernel/entry_32.S | 95 ++++++++++----------------------------- arch/powerpc/kernel/entry_64.S | 94 +++++++++----------------------------- arch/powerpc/kernel/ptrace.c | 5 +-- arch/powerpc/kernel/signal_32.c | 19 ++------ arch/powerpc/kernel/signal_64.c | 9 +--- arch/powerpc/kernel/systbl.S | 2 +- arch/ppc/kernel/asm-offsets.c | 1 - arch/ppc/kernel/entry.S | 95 ++++++++++----------------------------- include/asm-powerpc/thread_info.h | 8 +--- 10 files changed, 78 insertions(+), 251 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 840aad43a98b..c9a660e4c2db 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -92,7 +92,6 @@ int main(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); - DEFINE(TI_SIGFRAME, offsetof(struct thread_info, nvgprs_frame)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); #ifdef CONFIG_PPC32 DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain)); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index f20a67261ec7..4827ca1ec89b 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -227,7 +227,7 @@ ret_from_syscall: MTMSRD(r10) lwz r9,TI_FLAGS(r12) li r8,-_LAST_ERRNO - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL|_TIF_RESTORE_SIGMASK) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- syscall_exit_work cmplw 0,r3,r8 blt+ syscall_exit_cont @@ -287,8 +287,10 @@ syscall_dotrace: syscall_exit_work: andi. r0,r9,_TIF_RESTOREALL - bne- 2f - cmplw 0,r3,r8 + beq+ 0f + REST_NVGPRS(r1) + b 2f +0: cmplw 0,r3,r8 blt+ 1f andi. r0,r9,_TIF_NOERROR bne- 1f @@ -302,9 +304,7 @@ syscall_exit_work: 2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) beq 4f - /* Clear per-syscall TIF flags if any are set, but _leave_ - _TIF_SAVE_NVGPRS set in r9 since we haven't dealt with that - yet. */ + /* Clear per-syscall TIF flags if any are set. */ li r11,_TIF_PERSYSCALL_MASK addi r12,r12,TI_FLAGS @@ -318,8 +318,13 @@ syscall_exit_work: subi r12,r12,TI_FLAGS 4: /* Anything which requires enabling interrupts? */ - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SAVE_NVGPRS) - beq 7f + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) + beq ret_from_except + + /* Re-enable interrupts */ + ori r10,r10,MSR_EE + SYNC + MTMSRD(r10) /* Save NVGPRS if they're not saved already */ lwz r4,_TRAP(r1) @@ -328,71 +333,11 @@ syscall_exit_work: SAVE_NVGPRS(r1) li r4,0xc00 stw r4,_TRAP(r1) - - /* Re-enable interrupts */ -5: ori r10,r10,MSR_EE - SYNC - MTMSRD(r10) - - andi. r0,r9,_TIF_SAVE_NVGPRS - bne save_user_nvgprs - -save_user_nvgprs_cont: - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) - beq 7f - +5: addi r3,r1,STACK_FRAME_OVERHEAD bl do_syscall_trace_leave - REST_NVGPRS(r1) - -6: lwz r3,GPR3(r1) - LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ - SYNC - MTMSRD(r10) /* disable interrupts again */ - rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ - lwz r9,TI_FLAGS(r12) -7: - andi. r0,r9,_TIF_NEED_RESCHED - bne 8f - lwz r5,_MSR(r1) - andi. r5,r5,MSR_PR - beq ret_from_except - andi. r0,r9,_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK - beq ret_from_except - b do_user_signal -8: - ori r10,r10,MSR_EE - SYNC - MTMSRD(r10) /* re-enable interrupts */ - bl schedule - b 6b - -save_user_nvgprs: - lwz r8,TI_SIGFRAME(r12) - -.macro savewords start, end - 1: stw \start,4*(\start)(r8) - .section __ex_table,"a" - .align 2 - .long 1b,save_user_nvgprs_fault - .previous - .if \end - \start - savewords "(\start+1)",\end - .endif -.endm - savewords 14,31 - b save_user_nvgprs_cont - - -save_user_nvgprs_fault: - li r3,11 /* SIGSEGV */ - lwz r4,TI_TASK(r12) - bl force_sigsegv + b ret_from_except_full - rlwinm r12,r1,0,0,(31-THREAD_SHIFT) /* current_thread_info() */ - lwz r9,TI_FLAGS(r12) - b save_user_nvgprs_cont - #ifdef SHOW_SYSCALLS do_show_syscall: #ifdef SHOW_SYSCALLS_TASK @@ -490,6 +435,14 @@ ppc_clone: stw r0,_TRAP(r1) /* register set saved */ b sys_clone + .globl ppc_swapcontext +ppc_swapcontext: + SAVE_NVGPRS(r1) + lwz r0,_TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,_TRAP(r1) /* register set saved */ + b sys_swapcontext + /* * Top-level page fault handling. * This is in assembler because if do_page_fault tells us that @@ -683,7 +636,7 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,(31-THREAD_SHIFT) lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL|_TIF_RESTORE_SIGMASK) + andi. r0,r9,(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NEED_RESCHED) bne do_work restore_user: diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 388f861b8ed1..24be0cf86d7f 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -160,7 +160,7 @@ syscall_exit: mtmsrd r10,1 ld r9,TI_FLAGS(r12) li r11,-_LAST_ERRNO - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL|_TIF_SAVE_NVGPRS|_TIF_NOERROR|_TIF_RESTORE_SIGMASK) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- syscall_exit_work cmpld r3,r11 ld r5,_CCR(r1) @@ -216,8 +216,10 @@ syscall_exit_work: If TIF_NOERROR is set, just save r3 as it is. */ andi. r0,r9,_TIF_RESTOREALL - bne- 2f - cmpld r3,r11 /* r10 is -LAST_ERRNO */ + beq+ 0f + REST_NVGPRS(r1) + b 2f +0: cmpld r3,r11 /* r10 is -LAST_ERRNO */ blt+ 1f andi. r0,r9,_TIF_NOERROR bne- 1f @@ -229,9 +231,7 @@ syscall_exit_work: 2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) beq 4f - /* Clear per-syscall TIF flags if any are set, but _leave_ - _TIF_SAVE_NVGPRS set in r9 since we haven't dealt with that - yet. */ + /* Clear per-syscall TIF flags if any are set. */ li r11,_TIF_PERSYSCALL_MASK addi r12,r12,TI_FLAGS @@ -240,10 +240,9 @@ syscall_exit_work: stdcx. r10,0,r12 bne- 3b subi r12,r12,TI_FLAGS - -4: bl .save_nvgprs - /* Anything else left to do? */ - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SAVE_NVGPRS) + +4: /* Anything else left to do? */ + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) beq .ret_from_except_lite /* Re-enable interrupts */ @@ -251,26 +250,10 @@ syscall_exit_work: ori r10,r10,MSR_EE mtmsrd r10,1 - andi. r0,r9,_TIF_SAVE_NVGPRS - bne save_user_nvgprs - - /* If tracing, re-enable interrupts and do it */ -save_user_nvgprs_cont: - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) - beq 5f - + bl .save_nvgprs addi r3,r1,STACK_FRAME_OVERHEAD bl .do_syscall_trace_leave - REST_NVGPRS(r1) - clrrdi r12,r1,THREAD_SHIFT - - /* Disable interrupts again and handle other work if any */ -5: mfmsr r10 - rldicl r10,r10,48,1 - rotldi r10,r10,16 - mtmsrd r10,1 - - b .ret_from_except_lite + b .ret_from_except /* Save non-volatile GPRs, if not already saved. */ _GLOBAL(save_nvgprs) @@ -282,51 +265,6 @@ _GLOBAL(save_nvgprs) std r0,_TRAP(r1) blr - -save_user_nvgprs: - ld r10,TI_SIGFRAME(r12) - andi. r0,r9,_TIF_32BIT - beq- save_user_nvgprs_64 - - /* 32-bit save to userspace */ - -.macro savewords start, end - 1: stw \start,4*(\start)(r10) - .section __ex_table,"a" - .align 3 - .llong 1b,save_user_nvgprs_fault - .previous - .if \end - \start - savewords "(\start+1)",\end - .endif -.endm - savewords 14,31 - b save_user_nvgprs_cont - -save_user_nvgprs_64: - /* 64-bit save to userspace */ - -.macro savelongs start, end - 1: std \start,8*(\start)(r10) - .section __ex_table,"a" - .align 3 - .llong 1b,save_user_nvgprs_fault - .previous - .if \end - \start - savelongs "(\start+1)",\end - .endif -.endm - savelongs 14,31 - b save_user_nvgprs_cont - -save_user_nvgprs_fault: - li r3,11 /* SIGSEGV */ - ld r4,TI_TASK(r12) - bl .force_sigsegv - - clrrdi r12,r1,THREAD_SHIFT - ld r9,TI_FLAGS(r12) - b save_user_nvgprs_cont /* * The sigsuspend and rt_sigsuspend system calls can call do_signal @@ -352,6 +290,16 @@ _GLOBAL(ppc_clone) bl .sys_clone b syscall_exit +_GLOBAL(ppc32_swapcontext) + bl .save_nvgprs + bl .compat_sys_swapcontext + b syscall_exit + +_GLOBAL(ppc64_swapcontext) + bl .save_nvgprs + bl .sys_swapcontext + b syscall_exit + _GLOBAL(ret_from_fork) bl .schedule_tail REST_NVGPRS(r1) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 400793c71304..bcb83574335b 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -561,10 +561,7 @@ void do_syscall_trace_leave(struct pt_regs *regs) regs->result); if ((test_thread_flag(TIF_SYSCALL_TRACE) -#ifdef CONFIG_PPC64 - || test_thread_flag(TIF_SINGLESTEP) -#endif - ) + || test_thread_flag(TIF_SINGLESTEP)) && (current->ptrace & PT_PTRACED)) do_syscall_trace(); } diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index bd837b5dbf06..d7a4e814974d 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -151,10 +151,7 @@ static inline int save_general_regs(struct pt_regs *regs, elf_greg_t64 *gregs = (elf_greg_t64 *)regs; int i; - if (!FULL_REGS(regs)) { - set_thread_flag(TIF_SAVE_NVGPRS); - current_thread_info()->nvgprs_frame = frame->mc_gregs; - } + WARN_ON(!FULL_REGS(regs)); for (i = 0; i <= PT_RESULT; i ++) { if (i == 14 && !FULL_REGS(regs)) @@ -215,15 +212,7 @@ static inline int get_old_sigaction(struct k_sigaction *new_ka, static inline int save_general_regs(struct pt_regs *regs, struct mcontext __user *frame) { - if (!FULL_REGS(regs)) { - /* Zero out the unsaved GPRs to avoid information - leak, and set TIF_SAVE_NVGPRS to ensure that the - registers do actually get saved later. */ - memset(®s->gpr[14], 0, 18 * sizeof(unsigned long)); - current_thread_info()->nvgprs_frame = &frame->mc_gregs; - set_thread_flag(TIF_SAVE_NVGPRS); - } - + WARN_ON(!FULL_REGS(regs)); return __copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE); } @@ -826,8 +815,8 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int } long sys_swapcontext(struct ucontext __user *old_ctx, - struct ucontext __user *new_ctx, - int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) + struct ucontext __user *new_ctx, + int ctx_size, int r6, int r7, int r8, struct pt_regs *regs) { unsigned char tmp; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 497a5d3df359..4324f8a8ba24 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -118,14 +118,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, err |= __put_user(0, &sc->v_regs); #endif /* CONFIG_ALTIVEC */ err |= __put_user(&sc->gp_regs, &sc->regs); - if (!FULL_REGS(regs)) { - /* Zero out the unsaved GPRs to avoid information - leak, and set TIF_SAVE_NVGPRS to ensure that the - registers do actually get saved later. */ - memset(®s->gpr[14], 0, 18 * sizeof(unsigned long)); - set_thread_flag(TIF_SAVE_NVGPRS); - current_thread_info()->nvgprs_frame = &sc->gp_regs; - } + WARN_ON(!FULL_REGS(regs)); err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE); err |= __copy_to_user(&sc->fp_regs, ¤t->thread.fpr, FP_REGS_SIZE); err |= __put_user(signr, &sc->signal); diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S index 8a9f994ed917..1ad55f0466fd 100644 --- a/arch/powerpc/kernel/systbl.S +++ b/arch/powerpc/kernel/systbl.S @@ -288,7 +288,7 @@ COMPAT_SYS(clock_settime) COMPAT_SYS(clock_gettime) COMPAT_SYS(clock_getres) COMPAT_SYS(clock_nanosleep) -COMPAT_SYS(swapcontext) +SYSX(ppc64_swapcontext,ppc32_swapcontext,ppc_swapcontext) COMPAT_SYS(tgkill) COMPAT_SYS(utimes) COMPAT_SYS(statfs64) diff --git a/arch/ppc/kernel/asm-offsets.c b/arch/ppc/kernel/asm-offsets.c index 7964bf660e92..77e4dc780f8c 100644 --- a/arch/ppc/kernel/asm-offsets.c +++ b/arch/ppc/kernel/asm-offsets.c @@ -131,7 +131,6 @@ main(void) DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); - DEFINE(TI_SIGFRAME, offsetof(struct thread_info, nvgprs_frame)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain)); DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); diff --git a/arch/ppc/kernel/entry.S b/arch/ppc/kernel/entry.S index a48b950722a1..3a2815978488 100644 --- a/arch/ppc/kernel/entry.S +++ b/arch/ppc/kernel/entry.S @@ -227,7 +227,7 @@ ret_from_syscall: MTMSRD(r10) lwz r9,TI_FLAGS(r12) li r8,-_LAST_ERRNO - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL) + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) bne- syscall_exit_work cmplw 0,r3,r8 blt+ syscall_exit_cont @@ -287,8 +287,10 @@ syscall_dotrace: syscall_exit_work: andi. r0,r9,_TIF_RESTOREALL - bne- 2f - cmplw 0,r3,r8 + beq+ 0f + REST_NVGPRS(r1) + b 2f +0: cmplw 0,r3,r8 blt+ 1f andi. r0,r9,_TIF_NOERROR bne- 1f @@ -302,9 +304,7 @@ syscall_exit_work: 2: andi. r0,r9,(_TIF_PERSYSCALL_MASK) beq 4f - /* Clear per-syscall TIF flags if any are set, but _leave_ - _TIF_SAVE_NVGPRS set in r9 since we haven't dealt with that - yet. */ + /* Clear per-syscall TIF flags if any are set. */ li r11,_TIF_PERSYSCALL_MASK addi r12,r12,TI_FLAGS @@ -318,8 +318,13 @@ syscall_exit_work: subi r12,r12,TI_FLAGS 4: /* Anything which requires enabling interrupts? */ - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SAVE_NVGPRS) - beq 7f + andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) + beq ret_from_except + + /* Re-enable interrupts */ + ori r10,r10,MSR_EE + SYNC + MTMSRD(r10) /* Save NVGPRS if they're not saved already */ lwz r4,TRAP(r1) @@ -328,71 +333,11 @@ syscall_exit_work: SAVE_NVGPRS(r1) li r4,0xc00 stw r4,TRAP(r1) - - /* Re-enable interrupts */ -5: ori r10,r10,MSR_EE - SYNC - MTMSRD(r10) - - andi. r0,r9,_TIF_SAVE_NVGPRS - bne save_user_nvgprs - -save_user_nvgprs_cont: - andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP) - beq 7f - +5: addi r3,r1,STACK_FRAME_OVERHEAD bl do_syscall_trace_leave - REST_NVGPRS(r1) - -6: lwz r3,GPR3(r1) - LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */ - SYNC - MTMSRD(r10) /* disable interrupts again */ - rlwinm r12,r1,0,0,18 /* current_thread_info() */ - lwz r9,TI_FLAGS(r12) -7: - andi. r0,r9,_TIF_NEED_RESCHED - bne 8f - lwz r5,_MSR(r1) - andi. r5,r5,MSR_PR - beq ret_from_except - andi. r0,r9,_TIF_SIGPENDING - beq ret_from_except - b do_user_signal -8: - ori r10,r10,MSR_EE - SYNC - MTMSRD(r10) /* re-enable interrupts */ - bl schedule - b 6b - -save_user_nvgprs: - lwz r8,TI_SIGFRAME(r12) - -.macro savewords start, end - 1: stw \start,4*(\start)(r8) - .section __ex_table,"a" - .align 2 - .long 1b,save_user_nvgprs_fault - .previous - .if \end - \start - savewords "(\start+1)",\end - .endif -.endm - savewords 14,31 - b save_user_nvgprs_cont - - -save_user_nvgprs_fault: - li r3,11 /* SIGSEGV */ - lwz r4,TI_TASK(r12) - bl force_sigsegv + b ret_from_except_full - rlwinm r12,r1,0,0,18 /* current_thread_info() */ - lwz r9,TI_FLAGS(r12) - b save_user_nvgprs_cont - #ifdef SHOW_SYSCALLS do_show_syscall: #ifdef SHOW_SYSCALLS_TASK @@ -490,6 +435,14 @@ ppc_clone: stw r0,TRAP(r1) /* register set saved */ b sys_clone + .globl ppc_swapcontext +ppc_swapcontext: + SAVE_NVGPRS(r1) + lwz r0,TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,TRAP(r1) /* register set saved */ + b sys_swapcontext + /* * Top-level page fault handling. * This is in assembler because if do_page_fault tells us that @@ -683,7 +636,7 @@ user_exc_return: /* r10 contains MSR_KERNEL here */ /* Check current_thread_info()->flags */ rlwinm r9,r1,0,0,18 lwz r9,TI_FLAGS(r9) - andi. r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL) + andi. r0,r9,(_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NEED_RESCHED) bne do_work restore_user: diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h index 237fc2b72974..ffc7462d77ba 100644 --- a/include/asm-powerpc/thread_info.h +++ b/include/asm-powerpc/thread_info.h @@ -37,7 +37,6 @@ struct thread_info { int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; - void __user *nvgprs_frame; /* low level flags - has atomic operations done on it */ unsigned long flags ____cacheline_aligned_in_smp; }; @@ -120,7 +119,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_MEMDIE 10 #define TIF_SECCOMP 11 /* secure computing */ #define TIF_RESTOREALL 12 /* Restore all regs (implies NOERROR) */ -#define TIF_SAVE_NVGPRS 13 /* Save r14-r31 in signal frame */ #define TIF_NOERROR 14 /* Force successful syscall return */ #define TIF_RESTORE_SIGMASK 15 /* Restore signal mask in do_signal */ @@ -137,15 +135,13 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SINGLESTEP (1< Date: Wed, 8 Mar 2006 14:03:09 -0800 Subject: Mark the pipe file operations static They aren't used (nor even really usable) outside of pipe.c anyway Signed-off-by: Linus Torvalds --- fs/pipe.c | 6 +++--- include/linux/fs.h | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/pipe.c b/fs/pipe.c index d722579df79a..8aada8e426f4 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -605,7 +605,7 @@ struct file_operations rdwr_fifo_fops = { .fasync = pipe_rdwr_fasync, }; -struct file_operations read_pipe_fops = { +static struct file_operations read_pipe_fops = { .llseek = no_llseek, .read = pipe_read, .readv = pipe_readv, @@ -617,7 +617,7 @@ struct file_operations read_pipe_fops = { .fasync = pipe_read_fasync, }; -struct file_operations write_pipe_fops = { +static struct file_operations write_pipe_fops = { .llseek = no_llseek, .read = bad_pipe_r, .write = pipe_write, @@ -629,7 +629,7 @@ struct file_operations write_pipe_fops = { .fasync = pipe_write_fasync, }; -struct file_operations rdwr_pipe_fops = { +static struct file_operations rdwr_pipe_fops = { .llseek = no_llseek, .read = pipe_read, .readv = pipe_readv, diff --git a/include/linux/fs.h b/include/linux/fs.h index e059da947007..0cc34b1c42c9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1418,9 +1418,6 @@ extern int is_bad_inode(struct inode *); extern struct file_operations read_fifo_fops; extern struct file_operations write_fifo_fops; extern struct file_operations rdwr_fifo_fops; -extern struct file_operations read_pipe_fops; -extern struct file_operations write_pipe_fops; -extern struct file_operations rdwr_pipe_fops; extern int fs_may_remount_ro(struct super_block *); -- cgit v1.2.3 From 1c6cc5fd32978ffdc4d0acf8592d3901adefbdad Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 7 Mar 2006 21:55:20 -0800 Subject: [PATCH] powerpc: restore eeh_add_device_late() prototype stub We fixed this: arch/powerpc/platforms/pseries/eeh.c: In function `eeh_add_device_tree_late': arch/powerpc/platforms/pseries/eeh.c:901: warning: implicit declaration of function `eeh_add_device_late' arch/powerpc/platforms/pseries/eeh.c: At top level: arch/powerpc/platforms/pseries/eeh.c:918: error: conflicting types for 'eeh_add_device_late' arch/powerpc/platforms/pseries/eeh.c:901: error: previous implicit declaration of 'eeh_add_device_late' was here make[2]: *** [arch/powerpc/platforms/pseries/eeh.o] Error 1 But we forgot the !CONFIG_EEH stub. Signed-off-by: Mark Fasheh Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-powerpc/eeh.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-powerpc/eeh.h b/include/asm-powerpc/eeh.h index eb392032e19b..5207758a6dd9 100644 --- a/include/asm-powerpc/eeh.h +++ b/include/asm-powerpc/eeh.h @@ -118,6 +118,8 @@ static inline void pci_addr_cache_build(void) { } static inline void eeh_add_device_early(struct device_node *dn) { } +static inline void eeh_add_device_late(struct pci_dev *dev) { } + static inline void eeh_remove_device(struct pci_dev *dev) { } static inline void eeh_add_device_tree_early(struct device_node *dn) { } -- cgit v1.2.3 From 707ced0d718e89b52b13aa55a64653083e792cca Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Tue, 7 Mar 2006 21:55:28 -0800 Subject: [PATCH] __get_unaligned() gcc-4 fix If the 'ptr' is a const, this code cause "assignment of read-only variable" error on gcc 4.x. Use __u64 instead of __typeof__(*(ptr)) for temporary variable to get rid of errors on gcc 4.x. Signed-off-by: Atsushi Nemoto Cc: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/unaligned.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h index 4dc8ddb401c1..09ec447fe2af 100644 --- a/include/asm-generic/unaligned.h +++ b/include/asm-generic/unaligned.h @@ -78,7 +78,7 @@ static inline void __ustw(__u16 val, __u16 *addr) #define __get_unaligned(ptr, size) ({ \ const void *__gu_p = ptr; \ - __typeof__(*(ptr)) val; \ + __u64 val; \ switch (size) { \ case 1: \ val = *(const __u8 *)__gu_p; \ @@ -95,7 +95,7 @@ static inline void __ustw(__u16 val, __u16 *addr) default: \ bad_unaligned_access_length(); \ }; \ - val; \ + (__typeof__(*(ptr)))val; \ }) #define __put_unaligned(val, ptr, size) \ -- cgit v1.2.3 From e2bab3d92486fb781f4d06f56339264ed1492392 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 7 Mar 2006 21:55:31 -0800 Subject: [PATCH] percpu_counter_sum() Implement percpu_counter_sum(). This is a more accurate but slower version of percpu_counter_read_positive(). We need this for Alex's speedup-ext3_statfs patch and for the nr_file accounting fix. Otherwise these things would be too inaccurate on large CPU counts. Cc: Ravikiran G Thirumalai Cc: Alex Tomas Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu_counter.h | 6 ++++++ mm/swap.c | 25 +++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index bd6708e2c027..682525511c9e 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -39,6 +39,7 @@ static inline void percpu_counter_destroy(struct percpu_counter *fbc) } void percpu_counter_mod(struct percpu_counter *fbc, long amount); +long percpu_counter_sum(struct percpu_counter *fbc); static inline long percpu_counter_read(struct percpu_counter *fbc) { @@ -92,6 +93,11 @@ static inline long percpu_counter_read_positive(struct percpu_counter *fbc) return fbc->count; } +static inline long percpu_counter_sum(struct percpu_counter *fbc) +{ + return percpu_counter_read_positive(fbc); +} + #endif /* CONFIG_SMP */ static inline void percpu_counter_inc(struct percpu_counter *fbc) diff --git a/mm/swap.c b/mm/swap.c index cce3dda59c59..e9ec06d845e8 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -489,13 +489,34 @@ void percpu_counter_mod(struct percpu_counter *fbc, long amount) if (count >= FBC_BATCH || count <= -FBC_BATCH) { spin_lock(&fbc->lock); fbc->count += count; + *pcount = 0; spin_unlock(&fbc->lock); - count = 0; + } else { + *pcount = count; } - *pcount = count; put_cpu(); } EXPORT_SYMBOL(percpu_counter_mod); + +/* + * Add up all the per-cpu counts, return the result. This is a more accurate + * but much slower version of percpu_counter_read_positive() + */ +long percpu_counter_sum(struct percpu_counter *fbc) +{ + long ret; + int cpu; + + spin_lock(&fbc->lock); + ret = fbc->count; + for_each_cpu(cpu) { + long *pcount = per_cpu_ptr(fbc->counters, cpu); + ret += *pcount; + } + spin_unlock(&fbc->lock); + return ret < 0 ? 0 : ret; +} +EXPORT_SYMBOL(percpu_counter_sum); #endif /* -- cgit v1.2.3 From 21a1ea9eb40411d4ee29448c53b9e4c0654d6ceb Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Tue, 7 Mar 2006 21:55:33 -0800 Subject: [PATCH] rcu batch tuning This patch adds new tunables for RCU queue and finished batches. There are two types of controls - number of completed RCU updates invoked in a batch (blimit) and monitoring for high rate of incoming RCUs on a cpu (qhimark, qlowmark). By default, the per-cpu batch limit is set to a small value. If the input RCU rate exceeds the high watermark, we do two things - force quiescent state on all cpus and set the batch limit of the CPU to INTMAX. Setting batch limit to INTMAX forces all finished RCUs to be processed in one shot. If we have more than INTMAX RCUs queued up, then we have bigger problems anyway. Once the incoming queued RCUs fall below the low watermark, the batch limit is set to the default. Signed-off-by: Dipankar Sarma Cc: "Paul E. McKenney" Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 13 +++++++ include/linux/rcupdate.h | 6 ++- kernel/rcupdate.c | 76 ++++++++++++++++++++++++++++--------- 3 files changed, 76 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 75205391b335..bad5987c4727 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1284,6 +1284,19 @@ running once the system is up. New name for the ramdisk parameter. See Documentation/ramdisk.txt. + rcu.blimit= [KNL,BOOT] Set maximum number of finished + RCU callbacks to process in one batch. + + rcu.qhimark= [KNL,BOOT] Set threshold of queued + RCU callbacks over which batch limiting is disabled. + + rcu.qlowmark= [KNL,BOOT] Set threshold of queued + RCU callbacks below which batch limiting is re-enabled. + + rcu.rsinterval= [KNL,BOOT,SMP] Set the number of additional + RCU callbacks to queued before forcing reschedule + on all cpus. + rdinit= [KNL] Format: Run specified binary instead of /init from the ramdisk, diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b87aefa082e2..c2ec6c77874e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -98,13 +98,17 @@ struct rcu_data { long batch; /* Batch # for current RCU batch */ struct rcu_head *nxtlist; struct rcu_head **nxttail; - long count; /* # of queued items */ + long qlen; /* # of queued callbacks */ struct rcu_head *curlist; struct rcu_head **curtail; struct rcu_head *donelist; struct rcu_head **donetail; + long blimit; /* Upper limit on a processed batch */ int cpu; struct rcu_head barrier; +#ifdef CONFIG_SMP + long last_rs_qlen; /* qlen during the last resched */ +#endif }; DECLARE_PER_CPU(struct rcu_data, rcu_data); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 0cf8146bd585..8cf15a569fcd 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -67,7 +67,43 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; /* Fake initialization required by compiler */ static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; -static int maxbatch = 10000; +static int blimit = 10; +static int qhimark = 10000; +static int qlowmark = 100; +#ifdef CONFIG_SMP +static int rsinterval = 1000; +#endif + +static atomic_t rcu_barrier_cpu_count; +static struct semaphore rcu_barrier_sema; +static struct completion rcu_barrier_completion; + +#ifdef CONFIG_SMP +static void force_quiescent_state(struct rcu_data *rdp, + struct rcu_ctrlblk *rcp) +{ + int cpu; + cpumask_t cpumask; + set_need_resched(); + if (unlikely(rdp->qlen - rdp->last_rs_qlen > rsinterval)) { + rdp->last_rs_qlen = rdp->qlen; + /* + * Don't send IPI to itself. With irqs disabled, + * rdp->cpu is the current cpu. + */ + cpumask = rcp->cpumask; + cpu_clear(rdp->cpu, cpumask); + for_each_cpu_mask(cpu, cpumask) + smp_send_reschedule(cpu); + } +} +#else +static inline void force_quiescent_state(struct rcu_data *rdp, + struct rcu_ctrlblk *rcp) +{ + set_need_resched(); +} +#endif /** * call_rcu - Queue an RCU callback for invocation after a grace period. @@ -92,17 +128,13 @@ void fastcall call_rcu(struct rcu_head *head, rdp = &__get_cpu_var(rcu_data); *rdp->nxttail = head; rdp->nxttail = &head->next; - - if (unlikely(++rdp->count > 10000)) - set_need_resched(); - + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = INT_MAX; + force_quiescent_state(rdp, &rcu_ctrlblk); + } local_irq_restore(flags); } -static atomic_t rcu_barrier_cpu_count; -static struct semaphore rcu_barrier_sema; -static struct completion rcu_barrier_completion; - /** * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. @@ -131,12 +163,12 @@ void fastcall call_rcu_bh(struct rcu_head *head, rdp = &__get_cpu_var(rcu_bh_data); *rdp->nxttail = head; rdp->nxttail = &head->next; - rdp->count++; -/* - * Should we directly call rcu_do_batch() here ? - * if (unlikely(rdp->count > 10000)) - * rcu_do_batch(rdp); - */ + + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = INT_MAX; + force_quiescent_state(rdp, &rcu_bh_ctrlblk); + } + local_irq_restore(flags); } @@ -199,10 +231,12 @@ static void rcu_do_batch(struct rcu_data *rdp) next = rdp->donelist = list->next; list->func(list); list = next; - rdp->count--; - if (++count >= maxbatch) + rdp->qlen--; + if (++count >= rdp->blimit) break; } + if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark) + rdp->blimit = blimit; if (!rdp->donelist) rdp->donetail = &rdp->donelist; else @@ -473,6 +507,7 @@ static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp, rdp->quiescbatch = rcp->completed; rdp->qs_pending = 0; rdp->cpu = cpu; + rdp->blimit = blimit; } static void __devinit rcu_online_cpu(int cpu) @@ -567,7 +602,12 @@ void synchronize_kernel(void) synchronize_rcu(); } -module_param(maxbatch, int, 0); +module_param(blimit, int, 0); +module_param(qhimark, int, 0); +module_param(qlowmark, int, 0); +#ifdef CONFIG_SMP +module_param(rsinterval, int, 0); +#endif EXPORT_SYMBOL_GPL(rcu_batches_completed); EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */ EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */ -- cgit v1.2.3 From 529bf6be5c04f2e869d07bfdb122e9fd98ade714 Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Tue, 7 Mar 2006 21:55:35 -0800 Subject: [PATCH] fix file counting I have benchmarked this on an x86_64 NUMA system and see no significant performance difference on kernbench. Tested on both x86_64 and powerpc. The way we do file struct accounting is not very suitable for batched freeing. For scalability reasons, file accounting was constructor/destructor based. This meant that nr_files was decremented only when the object was removed from the slab cache. This is susceptible to slab fragmentation. With RCU based file structure, consequent batched freeing and a test program like Serge's, we just speed this up and end up with a very fragmented slab - llm22:~ # cat /proc/sys/fs/file-nr 587730 0 758844 At the same time, I see only a 2000+ objects in filp cache. The following patch I fixes this problem. This patch changes the file counting by removing the filp_count_lock. Instead we use a separate percpu counter, nr_files, for now and all accesses to it are through get_nr_files() api. In the sysctl handler for nr_files, we populate files_stat.nr_files before returning to user. Counting files as an when they are created and destroyed (as opposed to inside slab) allows us to correctly count open files with RCU. Signed-off-by: Dipankar Sarma Cc: "Paul E. McKenney" Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dcache.c | 2 +- fs/file_table.c | 87 +++++++++++++++++++++++++++++++++------------------- include/linux/file.h | 2 -- include/linux/fs.h | 1 + kernel/sysctl.c | 5 ++- net/unix/af_unix.c | 2 +- 6 files changed, 62 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index a173bba32666..11dc83092d4a 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1736,7 +1736,7 @@ void __init vfs_caches_init(unsigned long mempages) SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor); + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); dcache_init(mempages); inode_init(mempages); diff --git a/fs/file_table.c b/fs/file_table.c index 768b58167543..44fabeaa9415 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -5,6 +5,7 @@ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) */ +#include #include #include #include @@ -19,52 +20,67 @@ #include #include #include +#include +#include + +#include /* sysctl tunables... */ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -EXPORT_SYMBOL(files_stat); /* Needed by unix.o */ - /* public. Not pretty! */ - __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); -static DEFINE_SPINLOCK(filp_count_lock); +static struct percpu_counter nr_files __cacheline_aligned_in_smp; -/* slab constructors and destructors are called from arbitrary - * context and must be fully threaded - use a local spinlock - * to protect files_stat.nr_files - */ -void filp_ctor(void *objp, struct kmem_cache *cachep, unsigned long cflags) +static inline void file_free_rcu(struct rcu_head *head) { - if ((cflags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) { - unsigned long flags; - spin_lock_irqsave(&filp_count_lock, flags); - files_stat.nr_files++; - spin_unlock_irqrestore(&filp_count_lock, flags); - } + struct file *f = container_of(head, struct file, f_u.fu_rcuhead); + kmem_cache_free(filp_cachep, f); } -void filp_dtor(void *objp, struct kmem_cache *cachep, unsigned long dflags) +static inline void file_free(struct file *f) { - unsigned long flags; - spin_lock_irqsave(&filp_count_lock, flags); - files_stat.nr_files--; - spin_unlock_irqrestore(&filp_count_lock, flags); + percpu_counter_dec(&nr_files); + call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); } -static inline void file_free_rcu(struct rcu_head *head) +/* + * Return the total number of open files in the system + */ +static int get_nr_files(void) { - struct file *f = container_of(head, struct file, f_u.fu_rcuhead); - kmem_cache_free(filp_cachep, f); + return percpu_counter_read_positive(&nr_files); } -static inline void file_free(struct file *f) +/* + * Return the maximum number of open files in the system + */ +int get_max_files(void) { - call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); + return files_stat.max_files; } +EXPORT_SYMBOL_GPL(get_max_files); + +/* + * Handle nr_files sysctl + */ +#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) +int proc_nr_files(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + files_stat.nr_files = get_nr_files(); + return proc_dointvec(table, write, filp, buffer, lenp, ppos); +} +#else +int proc_nr_files(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return -ENOSYS; +} +#endif /* Find an unused file structure and return a pointer to it. * Returns NULL, if there are no more free file structures or @@ -78,14 +94,20 @@ struct file *get_empty_filp(void) /* * Privileged users can go above max_files */ - if (files_stat.nr_files >= files_stat.max_files && - !capable(CAP_SYS_ADMIN)) - goto over; + if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) { + /* + * percpu_counters are inaccurate. Do an expensive check before + * we go and fail. + */ + if (percpu_counter_sum(&nr_files) >= files_stat.max_files) + goto over; + } f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); if (f == NULL) goto fail; + percpu_counter_inc(&nr_files); memset(f, 0, sizeof(*f)); if (security_file_alloc(f)) goto fail_sec; @@ -101,10 +123,10 @@ struct file *get_empty_filp(void) over: /* Ran out of filps - report that */ - if (files_stat.nr_files > old_max) { + if (get_nr_files() > old_max) { printk(KERN_INFO "VFS: file-max limit %d reached\n", - files_stat.max_files); - old_max = files_stat.nr_files; + get_max_files()); + old_max = get_nr_files(); } goto fail; @@ -276,4 +298,5 @@ void __init files_init(unsigned long mempages) if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; files_defer_init(); + percpu_counter_init(&nr_files); } diff --git a/include/linux/file.h b/include/linux/file.h index 418b6101b59a..9901b850f2e4 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -60,8 +60,6 @@ extern void put_filp(struct file *); extern int get_unused_fd(void); extern void FASTCALL(put_unused_fd(unsigned int fd)); struct kmem_cache; -extern void filp_ctor(void * objp, struct kmem_cache *cachep, unsigned long cflags); -extern void filp_dtor(void * objp, struct kmem_cache *cachep, unsigned long dflags); extern struct file ** alloc_fd_array(int); extern void free_fd_array(struct file **, int); diff --git a/include/linux/fs.h b/include/linux/fs.h index 0cc34b1c42c9..51c0c93bdf93 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -35,6 +35,7 @@ struct files_stat_struct { int max_files; /* tunable */ }; extern struct files_stat_struct files_stat; +extern int get_max_files(void); struct inodes_stat_t { int nr_inodes; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index de2d9109194e..32b48e8ee36e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -50,6 +50,9 @@ #include #include +extern int proc_nr_files(ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); + #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ @@ -943,7 +946,7 @@ static ctl_table fs_table[] = { .data = &files_stat, .maxlen = 3*sizeof(int), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_nr_files, }, { .ctl_name = FS_MAXFILE, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1b5989b1b670..c323cc6a28b0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -547,7 +547,7 @@ static struct sock * unix_create1(struct socket *sock) struct sock *sk = NULL; struct unix_sock *u; - if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files) + if (atomic_read(&unix_nr_socks) >= 2*get_max_files()) goto out; sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1); -- cgit v1.2.3 From 2ec5e3a867d63d04932e11c6097f63760d9be3fe Mon Sep 17 00:00:00 2001 From: Michael Matz Date: Tue, 7 Mar 2006 21:55:48 -0800 Subject: [PATCH] fix kexec asm While testing kexec and kdump we hit problems where the new kernel would freeze or instantly reboot. The easiest way to trigger it was to kexec a kernel compiled for CONFIG_M586 on an athlon cpu. Compiling for CONFIG_MK7 instead would work fine. The patch fixes a few problems with the kexec inline asm. Signed-off-by: Chris Mason Acked-by: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/machine_kexec.c | 14 +++++++------- arch/x86_64/kernel/machine_kexec.c | 2 +- include/asm-powerpc/kexec.h | 3 ++- 3 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index a912fed48482..f73d7374a2ba 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c @@ -116,13 +116,13 @@ static void load_segments(void) __asm__ __volatile__ ( "\tljmp $"STR(__KERNEL_CS)",$1f\n" "\t1:\n" - "\tmovl $"STR(__KERNEL_DS)",%eax\n" - "\tmovl %eax,%ds\n" - "\tmovl %eax,%es\n" - "\tmovl %eax,%fs\n" - "\tmovl %eax,%gs\n" - "\tmovl %eax,%ss\n" - ); + "\tmovl $"STR(__KERNEL_DS)",%%eax\n" + "\tmovl %%eax,%%ds\n" + "\tmovl %%eax,%%es\n" + "\tmovl %%eax,%%fs\n" + "\tmovl %%eax,%%gs\n" + "\tmovl %%eax,%%ss\n" + ::: "eax", "memory"); #undef STR #undef __STR } diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c index 89fab51e20f4..25ac8a3faae6 100644 --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c @@ -140,7 +140,7 @@ static void load_segments(void) "\tmovl %0,%%ss\n" "\tmovl %0,%%fs\n" "\tmovl %0,%%gs\n" - : : "a" (__KERNEL_DS) + : : "a" (__KERNEL_DS) : "memory" ); } diff --git a/include/asm-powerpc/kexec.h b/include/asm-powerpc/kexec.h index bda2f217e6fe..6a2af2f6853b 100644 --- a/include/asm-powerpc/kexec.h +++ b/include/asm-powerpc/kexec.h @@ -93,7 +93,8 @@ static inline void crash_setup_regs(struct pt_regs *newregs, "mfxer %0\n" "std %0, 296(%2)\n" : "=&r" (tmp1), "=&r" (tmp2) - : "b" (newregs)); + : "b" (newregs) + : "memory"); } } #else -- cgit v1.2.3 From f9262c12c0084ddba445a9a42e98994018e51400 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 8 Mar 2006 17:57:25 -0800 Subject: [PATCH] i386: port ATI timer fix from x86_64 to i386 II ATI chipsets tend to generate double timer interrupts for the local APIC timer when both the 8254 and the IO-APIC timer pins are enabled. This is because they route it to both and the result is anded together and the CPU ends up processing it twice. This patch changes check_timer to disable the 8254 routing for interrupt 0. I think it would be safe on all chipsets actually (i tested it on a couple and it worked everywhere) and Windows seems to do it in a similar way, but to be conservative this patch only enables this mode on ATI (and adds options to enable/disable too) Ported over from a similar x86-64 change. I reused the ACPI earlyquirk infrastructure for the ATI bridge check, but tweaked it a bit to work even without ACPI. Inspired by a patch from Chuck Ebbert, but redone. Cc: Chuck Ebbert <76306.1226@compuserve.com> Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 6 ++++++ arch/i386/kernel/Makefile | 2 +- arch/i386/kernel/acpi/Makefile | 2 +- arch/i386/kernel/acpi/boot.c | 3 --- arch/i386/kernel/acpi/earlyquirk.c | 8 ++++++++ arch/i386/kernel/io_apic.c | 19 ++++++++++++++++++- arch/i386/kernel/setup.c | 4 ++++ include/asm-i386/apic.h | 2 ++ 8 files changed, 40 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index bad5987c4727..fc99075e0af4 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -335,6 +335,12 @@ running once the system is up. timesource is not avalible, it defaults to PIT. Format: { pit | tsc | cyclone | pmtmr } + disable_8254_timer + enable_8254_timer + [IA32/X86_64] Disable/Enable interrupt 0 timer routing + over the 8254 in addition to over the IO-APIC. The + kernel tries to set a sensible default. + hpet= [IA-32,HPET] option to disable HPET and use PIT. Format: disable diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 53bb9a79e274..65656c033d70 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -11,7 +11,7 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ obj-y += cpu/ obj-y += timers/ -obj-$(CONFIG_ACPI) += acpi/ +obj-y += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o obj-$(CONFIG_X86_MSR) += msr.o diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile index d51c7313cae8..7e9ac99354f4 100644 --- a/arch/i386/kernel/acpi/Makefile +++ b/arch/i386/kernel/acpi/Makefile @@ -1,4 +1,4 @@ -obj-y := boot.o +obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 79577f0ace98..f1a21945963d 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -1111,9 +1111,6 @@ int __init acpi_boot_table_init(void) disable_acpi(); return error; } -#ifdef __i386__ - check_acpi_pci(); -#endif acpi_table_parse(ACPI_BOOT, acpi_parse_sbf); diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index f1b9d2a46dab..2e3b643a4dc4 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c @@ -7,14 +7,22 @@ #include #include #include +#include static int __init check_bridge(int vendor, int device) { +#ifdef CONFIG_ACPI /* According to Nvidia all timer overrides are bogus. Just ignore them all. */ if (vendor == PCI_VENDOR_ID_NVIDIA) { acpi_skip_timer_override = 1; } +#endif + if (vendor == PCI_VENDOR_ID_ATI && timer_over_8254 == 1) { + timer_over_8254 = 0; + printk(KERN_INFO "ATI board detected. Disabling timer routing " + "over 8254.\n"); + } return 0; } diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 235822b3f41b..39d9a5fa907e 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -51,6 +51,8 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); +int timer_over_8254 __initdata = 1; + /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes @@ -2267,7 +2269,8 @@ static inline void check_timer(void) apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); timer_ack = 1; - enable_8259A_irq(0); + if (timer_over_8254 > 0) + enable_8259A_irq(0); pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2392,6 +2395,20 @@ void __init setup_IO_APIC(void) print_IO_APIC(); } +static int __init setup_disable_8254_timer(char *s) +{ + timer_over_8254 = -1; + return 1; +} +static int __init setup_enable_8254_timer(char *s) +{ + timer_over_8254 = 2; + return 1; +} + +__setup("disable_8254_timer", setup_disable_8254_timer); +__setup("enable_8254_timer", setup_enable_8254_timer); + /* * Called after all the initialization is done. If we didnt find any * APIC bugs then we can allow the modify fast path diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 51e513b4f72d..ab62a9f4701e 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1599,6 +1599,10 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_map_memmap(); +#ifdef CONFIG_X86_IO_APIC + check_acpi_pci(); /* Checks more than just ACPI actually */ +#endif + #ifdef CONFIG_ACPI /* * Parse the ACPI tables for possible boot-time SMP configuration. diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h index d30b8571573f..ff9ac8d19eb2 100644 --- a/include/asm-i386/apic.h +++ b/include/asm-i386/apic.h @@ -137,6 +137,8 @@ void switch_APIC_timer_to_ipi(void *cpumask); void switch_ipi_to_APIC_timer(void *cpumask); #define ARCH_APICTIMER_STOPS_ON_C3 1 +extern int timer_over_8254; + #else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } -- cgit v1.2.3 From fd2a4f1183d1e6802457d70cea067396236ed64b Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 8 Mar 2006 16:04:32 +0000 Subject: [MIPS] Undefine scr_writew and scr_readw in . This is gluing the build of cirrusfb but really the mess that would need cleaning and fixing is