From a73ad3331fdbf4191cf99b83ea9ac7082b6757ba Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 25 Dec 2008 10:39:01 -0800 Subject: x86: unify the implementation of FPU traps On 32 bits, we may suffer IRQ 13, or supposedly we might have a buggy implementation which gives spurious trap 16. We did not check for this on 64 bits, but there is no reason we can't make the code the same in both cases. Furthermore, this is presumably rare, so do the spurious check last, instead of first. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/traps.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f37cee75ab58..f5a640ba04bc 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -689,12 +689,7 @@ void math_error(void __user *ip) cwd = get_fpu_cwd(task); swd = get_fpu_swd(task); - err = swd & ~cwd & 0x3f; - -#ifdef CONFIG_X86_32 - if (!err) - return; -#endif + err = swd & ~cwd; if (err & 0x001) { /* Invalid op */ /* @@ -712,7 +707,9 @@ void math_error(void __user *ip) } else if (err & 0x020) { /* Precision */ info.si_code = FPE_FLTRES; } else { - info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */ + /* If we're using IRQ 13, or supposedly even some trap 16 + implementations, it's possible we get a spurious trap... */ + return; /* Spurious trap, no error */ } force_sig_info(SIGFPE, &info, task); } -- cgit v1.2.3 From 71ab6b245fda6e7597a667a67cce0d26c3c7a14b Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 25 Dec 2008 17:18:43 +1030 Subject: x86: remove impossible test in mtrr/main.c Impact: cleanup enable_mtrr_cleanup is static, and is never set to anything but 0 or 1. Signed-off-by: Rusty Russell Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mtrr/main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index c78c04821ea1..acd9ac54d47f 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -823,16 +823,14 @@ static int enable_mtrr_cleanup __initdata = static int __init disable_mtrr_cleanup_setup(char *str) { - if (enable_mtrr_cleanup != -1) - enable_mtrr_cleanup = 0; + enable_mtrr_cleanup = 0; return 0; } early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); static int __init enable_mtrr_cleanup_setup(char *str) { - if (enable_mtrr_cleanup != -1) - enable_mtrr_cleanup = 1; + enable_mtrr_cleanup = 1; return 0; } early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); -- cgit v1.2.3 From bd8b96dfc216eebc72950a6c40da8d3eca8667df Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 26 Dec 2008 09:20:22 +0100 Subject: x86: clean up comment style in arch/x86/kernel/traps.c Impact: cleanup Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f5a640ba04bc..dbfb80289928 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -292,8 +292,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) tsk->thread.error_code = error_code; tsk->thread.trap_no = 8; - /* This is always a kernel trap and never fixable (and thus must - never return). */ + /* + * This is always a kernel trap and never fixable (and thus must + * never return). + */ for (;;) die(str, regs, error_code); } @@ -524,9 +526,11 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) } #ifdef CONFIG_X86_64 -/* Help handler running on IST stack to switch back to user stack - for scheduling or signal handling. The actual stack switch is done in - entry.S */ +/* + * Help handler running on IST stack to switch back to user stack + * for scheduling or signal handling. The actual stack switch is done in + * entry.S + */ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) { struct pt_regs *regs = eregs; @@ -536,8 +540,10 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) /* Exception from user space */ else if (user_mode(eregs)) regs = task_pt_regs(current); - /* Exception from kernel and interrupts are enabled. Move to - kernel process stack. */ + /* + * Exception from kernel and interrupts are enabled. Move to + * kernel process stack. + */ else if (eregs->flags & X86_EFLAGS_IF) regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); if (eregs != regs) @@ -707,8 +713,10 @@ void math_error(void __user *ip) } else if (err & 0x020) { /* Precision */ info.si_code = FPE_FLTRES; } else { - /* If we're using IRQ 13, or supposedly even some trap 16 - implementations, it's possible we get a spurious trap... */ + /* + * If we're using IRQ 13, or supposedly even some trap 16 + * implementations, it's possible we get a spurious trap... + */ return; /* Spurious trap, no error */ } force_sig_info(SIGFPE, &info, task); -- cgit v1.2.3 From 393d68fb9929817cde7ab31c82d66fcb28ad35fc Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 26 Dec 2008 22:23:38 +1030 Subject: cpumask: x86: Introduce cpumask_of_{node,pcibus} to replace {node,pcibus}_to_cpumask Impact: New APIs The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these return a pointer to a struct cpumask. Part of removing cpumasks from the stack. Also makes __pcibus_to_node take a const pointer. Signed-off-by: Rusty Russell Acked-by: Ingo Molnar --- arch/x86/include/asm/pci.h | 10 ++++++++-- arch/x86/include/asm/topology.h | 35 +++++++++++++++++++++++------------ arch/x86/kernel/setup_percpu.c | 8 ++++---- 3 files changed, 35 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 875b38edf193..52d80d3d94f3 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -98,9 +98,9 @@ static inline void early_quirks(void) { } #ifdef CONFIG_NUMA /* Returns the node based on pci bus */ -static inline int __pcibus_to_node(struct pci_bus *bus) +static inline int __pcibus_to_node(const struct pci_bus *bus) { - struct pci_sysdata *sd = bus->sysdata; + const struct pci_sysdata *sd = bus->sysdata; return sd->node; } @@ -109,6 +109,12 @@ static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus) { return node_to_cpumask(__pcibus_to_node(bus)); } + +static inline const struct cpumask * +cpumask_of_pcibus(const struct pci_bus *bus) +{ + return cpumask_of_node(__pcibus_to_node(bus)); +} #endif #endif /* _ASM_X86_PCI_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index ff386ff50ed7..45da5dc50fc8 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -61,13 +61,19 @@ static inline int cpu_to_node(int cpu) * * Side note: this function creates the returned cpumask on the stack * so with a high NR_CPUS count, excessive stack space is used. The - * node_to_cpumask_ptr function should be used whenever possible. + * cpumask_of_node function should be used whenever possible. */ static inline cpumask_t node_to_cpumask(int node) { return node_to_cpumask_map[node]; } +/* Returns a bitmask of CPUs on Node 'node'. */ +static inline const struct cpumask *cpumask_of_node(int node) +{ + return &node_to_cpumask_map[node]; +} + #else /* CONFIG_X86_64 */ /* Mappings between node number and cpus on that node. */ @@ -82,7 +88,7 @@ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); #ifdef CONFIG_DEBUG_PER_CPU_MAPS extern int cpu_to_node(int cpu); extern int early_cpu_to_node(int cpu); -extern const cpumask_t *_node_to_cpumask_ptr(int node); +extern const cpumask_t *cpumask_of_node(int node); extern cpumask_t node_to_cpumask(int node); #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ @@ -103,7 +109,7 @@ static inline int early_cpu_to_node(int cpu) } /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ -static inline const cpumask_t *_node_to_cpumask_ptr(int node) +static inline const cpumask_t *cpumask_of_node(int node) { return &node_to_cpumask_map[node]; } @@ -116,12 +122,15 @@ static inline cpumask_t node_to_cpumask(int node) #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ -/* Replace default node_to_cpumask_ptr with optimized version */ +/* + * Replace default node_to_cpumask_ptr with optimized version + * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" + */ #define node_to_cpumask_ptr(v, node) \ - const cpumask_t *v = _node_to_cpumask_ptr(node) + const cpumask_t *v = cpumask_of_node(node) #define node_to_cpumask_ptr_next(v, node) \ - v = _node_to_cpumask_ptr(node) + v = cpumask_of_node(node) #endif /* CONFIG_X86_64 */ @@ -187,7 +196,7 @@ extern int __node_distance(int, int); #define cpu_to_node(cpu) 0 #define early_cpu_to_node(cpu) 0 -static inline const cpumask_t *_node_to_cpumask_ptr(int node) +static inline const cpumask_t *cpumask_of_node(int node) { return &cpu_online_map; } @@ -200,12 +209,15 @@ static inline int node_to_first_cpu(int node) return first_cpu(cpu_online_map); } -/* Replace default node_to_cpumask_ptr with optimized version */ +/* + * Replace default node_to_cpumask_ptr with optimized version + * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" + */ #define node_to_cpumask_ptr(v, node) \ - const cpumask_t *v = _node_to_cpumask_ptr(node) + const cpumask_t *v = cpumask_of_node(node) #define node_to_cpumask_ptr_next(v, node) \ - v = _node_to_cpumask_ptr(node) + v = cpumask_of_node(node) #endif #include @@ -214,8 +226,7 @@ static inline int node_to_first_cpu(int node) /* Returns the number of the first CPU on Node 'node'. */ static inline int node_to_first_cpu(int node) { - node_to_cpumask_ptr(mask, node); - return first_cpu(*mask); + return cpumask_first(cpumask_of_node(node)); } #endif diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 1c2084291f97..8e8b1193add5 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -334,25 +334,25 @@ static const cpumask_t cpu_mask_none; /* * Returns a pointer to the bitmask of CPUs on Node 'node'. */ -const cpumask_t *_node_to_cpumask_ptr(int node) +const cpumask_t *cpumask_of_node(int node) { if (node_to_cpumask_map == NULL) { printk(KERN_WARNING - "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", + "cpumask_of_node(%d): no node_to_cpumask_map!\n", node); dump_stack(); return (const cpumask_t *)&cpu_online_map; } if (node >= nr_node_ids) { printk(KERN_WARNING - "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", + "cpumask_of_node(%d): node > nr_node_ids(%d)\n", node, nr_node_ids); dump_stack(); return &cpu_mask_none; } return &node_to_cpumask_map[node]; } -EXPORT_SYMBOL(_node_to_cpumask_ptr); +EXPORT_SYMBOL(cpumask_of_node); /* * Returns a bitmask of CPUs on Node 'node'. -- cgit v1.2.3 From 96d76a74870d5f11ce2abdd09a8dcdc401d714d1 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 26 Dec 2008 22:23:38 +1030 Subject: cpumask: sparc: Introduce cpumask_of_{node,pcibus} to replace {node,pcibus}_to_cpumask Impact: New APIs The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these return a pointer to a struct cpumask. Part of removing cpumasks from the stack. Signed-off-by: Rusty Russell Acked-by: David S. Miller --- arch/sparc/include/asm/topology_64.h | 10 ++++++---- arch/sparc64/kernel/of_device.c | 2 +- arch/sparc64/kernel/pci_msi.c | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 001c04027c82..afd3cc1824d7 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -16,8 +16,12 @@ static inline cpumask_t node_to_cpumask(int node) { return numa_cpumask_lookup_table[node]; } +#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node]) -/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ +/* + * Returns a pointer to the cpumask of CPUs on Node 'node'. + * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" + */ #define node_to_cpumask_ptr(v, node) \ cpumask_t *v = &(numa_cpumask_lookup_table[node]) @@ -26,9 +30,7 @@ static inline cpumask_t node_to_cpumask(int node) static inline int node_to_first_cpu(int node) { - cpumask_t tmp; - tmp = node_to_cpumask(node); - return first_cpu(tmp); + return cpumask_first(cpumask_of_node(node)); } struct pci_bus; diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c index df2efb7fc14c..4f6098d318ec 100644 --- a/arch/sparc64/kernel/of_device.c +++ b/arch/sparc64/kernel/of_device.c @@ -778,7 +778,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op, out: nid = of_node_to_nid(dp); if (nid != -1) { - cpumask_t numa_mask = node_to_cpumask(nid); + cpumask_t numa_mask = *cpumask_of_node(nid); irq_set_affinity(irq, &numa_mask); } diff --git a/arch/sparc64/kernel/pci_msi.c b/arch/sparc64/kernel/pci_msi.c index 0d0cd815e83e..4ef282e81912 100644 --- a/arch/sparc64/kernel/pci_msi.c +++ b/arch/sparc64/kernel/pci_msi.c @@ -286,7 +286,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm, nid = pbm->numa_node; if (nid != -1) { - cpumask_t numa_mask = node_to_cpumask(nid); + cpumask_t numa_mask = *cpumask_of_node(nid); irq_set_affinity(irq, &numa_mask); } -- cgit v1.2.3 From 7479a2939df4957ba794cce814379b6d10914bdc Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 26 Dec 2008 22:23:39 +1030 Subject: cpumask: sh: Introduce cpumask_of_{node,pcibus} to replace {node,pcibus}_to_cpumask Impact: New APIs The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these return a pointer to a struct cpumask. Part of removing cpumasks from the stack. Signed-off-by: Rusty Russell Cc: Paul Mundt --- arch/sh/include/asm/topology.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index 95f0085e098a..9aa160d0efe5 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -33,6 +33,7 @@ #define parent_node(node) ((void)(node),0) #define node_to_cpumask(node) ((void)node, cpu_online_map) +#define cpumask_of_node(node) ((void)node, cpu_online_mask) #define node_to_first_cpu(node) ((void)(node),0) #define pcibus_to_node(bus) ((void)(bus), -1) -- cgit v1.2.3 From 86c6f274f52c3e991d429869780945c0790e7b65 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 26 Dec 2008 22:23:39 +1030 Subject: cpumask: powerpc: Introduce cpumask_of_{node,pcibus} to replace {node,pcibus}_to_cpumask Impact: New APIs The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these return a pointer to a struct cpumask. Part of removing cpumasks from the stack. (Also replaces powerpc internal uses of node_to_cpumask). Signed-off-by: Rusty Russell Cc: Benjamin Herrenschmidt --- arch/powerpc/include/asm/topology.h | 10 +++++++--- arch/powerpc/platforms/cell/spu_priv1_mmio.c | 6 +++--- arch/powerpc/platforms/cell/spufs/sched.c | 4 ++-- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index c32da6f97999..bcf25c2b8d21 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -22,11 +22,11 @@ static inline cpumask_t node_to_cpumask(int node) return numa_cpumask_lookup_table[node]; } +#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node]) + static inline int node_to_first_cpu(int node) { - cpumask_t tmp; - tmp = node_to_cpumask(node); - return first_cpu(tmp); + return cpumask_first(cpumask_of_node(node)); } int of_node_to_nid(struct device_node *device); @@ -46,6 +46,10 @@ static inline int pcibus_to_node(struct pci_bus *bus) node_to_cpumask(pcibus_to_node(bus)) \ ) +#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ + cpu_all_mask : \ + cpumask_of_node(pcibus_to_node(bus))) + /* sched_domains SD_NODE_INIT for PPC64 machines */ #define SD_NODE_INIT (struct sched_domain) { \ .span = CPU_MASK_NONE, \ diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c index 906a0a2a9fe1..1410443731eb 100644 --- a/arch/powerpc/platforms/cell/spu_priv1_mmio.c +++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c @@ -80,10 +80,10 @@ static void cpu_affinity_set(struct spu *spu, int cpu) u64 route; if (nr_cpus_node(spu->node)) { - cpumask_t spumask = node_to_cpumask(spu->node); - cpumask_t cpumask = node_to_cpumask(cpu_to_node(cpu)); + const struct cpumask *spumask = cpumask_of_node(spu->node), + *cpumask = cpumask_of_node(cpu_to_node(cpu)); - if (!cpus_intersects(spumask, cpumask)) + if (!cpumask_intersects(spumask, cpumask)) return; } diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 2ad914c47493..6a0ad196aeb3 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -166,9 +166,9 @@ void spu_update_sched_info(struct spu_context *ctx) static int __node_allowed(struct spu_context *ctx, int node) { if (nr_cpus_node(node)) { - cpumask_t mask = node_to_cpumask(node); + const struct cpumask *mask = cpumask_of_node(node); - if (cpus_intersects(mask, ctx->cpus_allowed)) + if (cpumask_intersects(mask, &ctx->cpus_allowed)) return 1; } -- cgit v1.2.3 From fbb776c3ca4501d5a2821bf1e9bceefcaec7ae47 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 26 Dec 2008 22:23:40 +1030 Subject: cpumask: IA64: Introduce cpumask_of_{node,pcibus} to replace {node,pcibus}_to_cpumask Impact: New APIs The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these return a pointer to a struct cpumask. Part of removing cpumasks from the stack. We can also use the new for_each_cpu_and() to avoid a temporary cpumask, and a gratuitous test in sn_topology_show. (Includes fix from KOSAKI Motohiro ) Signed-off-by: Rusty Russell Cc: Tony Luck Cc: KOSAKI Motohiro --- arch/ia64/include/asm/topology.h | 7 ++++++- arch/ia64/kernel/acpi.c | 2 +- arch/ia64/kernel/iosapic.c | 23 +++++++++++------------ arch/ia64/sn/kernel/sn2/sn_hwperf.c | 27 ++++++++++++--------------- 4 files changed, 30 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 35bcb641c9e5..66f0f1ef9e7f 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -34,6 +34,7 @@ * Returns a bitmask of CPUs on Node 'node'. */ #define node_to_cpumask(node) (node_to_cpu_mask[node]) +#define cpumask_of_node(node) (&node_to_cpu_mask[node]) /* * Returns the number of the node containing Node 'nid'. @@ -45,7 +46,7 @@ /* * Returns the number of the first CPU on Node 'node'. */ -#define node_to_first_cpu(node) (first_cpu(node_to_cpumask(node))) +#define node_to_first_cpu(node) (cpumask_first(cpumask_of_node(node))) /* * Determines the node for a given pci bus @@ -121,6 +122,10 @@ extern void arch_fix_phys_package_id(int num, u32 slot); node_to_cpumask(pcibus_to_node(bus)) \ ) +#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ + cpu_all_mask : \ + cpumask_from_node(pcibus_to_node(bus))) + #include #endif /* _ASM_IA64_TOPOLOGY_H */ diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index bd7acc71e8a9..54ae373e6e22 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -1001,7 +1001,7 @@ acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) node = pxm_to_node(pxm); if (node >= MAX_NUMNODES || !node_online(node) || - cpus_empty(node_to_cpumask(node))) + cpumask_empty(cpumask_of_node(node))) return AE_OK; /* We know a gsi to node mapping! */ diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index c8adecd5b416..5cfd3d91001a 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -695,32 +695,31 @@ get_target_cpu (unsigned int gsi, int irq) #ifdef CONFIG_NUMA { int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; - cpumask_t cpu_mask; + const struct cpumask *cpu_mask; iosapic_index = find_iosapic(gsi); if (iosapic_index < 0 || iosapic_lists[iosapic_index].node == MAX_NUMNODES) goto skip_numa_setup; - cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node); - cpus_and(cpu_mask, cpu_mask, domain); - for_each_cpu_mask(numa_cpu, cpu_mask) { - if (!cpu_online(numa_cpu)) - cpu_clear(numa_cpu, cpu_mask); + cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node); + num_cpus = 0; + for_each_cpu_and(numa_cpu, cpu_mask, &domain) { + if (cpu_online(numa_cpu)) + num_cpus++; } - num_cpus = cpus_weight(cpu_mask); - if (!num_cpus) goto skip_numa_setup; /* Use irq assignment to distribute across cpus in node */ cpu_index = irq % num_cpus; - for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++) - numa_cpu = next_cpu(numa_cpu, cpu_mask); + for_each_cpu_and(numa_cpu, cpu_mask, &domain) + if (cpu_online(numa_cpu) && i++ >= cpu_index) + break; - if (numa_cpu != NR_CPUS) + if (numa_cpu < nr_cpu_ids) return cpu_physical_id(numa_cpu); } skip_numa_setup: @@ -731,7 +730,7 @@ skip_numa_setup: * case of NUMA.) */ do { - if (++cpu >= NR_CPUS) + if (++cpu >= nr_cpu_ids) cpu = 0; } while (!cpu_online(cpu) || !cpu_isset(cpu, domain)); diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 636588e7e068..be339477f906 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -385,7 +385,6 @@ static int sn_topology_show(struct seq_file *s, void *d) int j; const char *slabname; int ordinal; - cpumask_t cpumask; char slice; struct cpuinfo_ia64 *c; struct sn_hwperf_port_info *ptdata; @@ -473,23 +472,21 @@ static int sn_topology_show(struct seq_file *s, void *d) * CPUs on this node, if any */ if (!SN_HWPERF_IS_IONODE(obj)) { - cpumask = node_to_cpumask(ordinal); - for_each_online_cpu(i) { - if (cpu_isset(i, cpumask)) { - slice = 'a' + cpuid_to_slice(i); - c = cpu_data(i); - seq_printf(s, "cpu %d %s%c local" - " freq %luMHz, arch ia64", - i, obj->location, slice, - c->proc_freq / 1000000); - for_each_online_cpu(j) { - seq_printf(s, j ? ":%d" : ", dist %d", - node_distance( + for_each_cpu_and(i, cpu_online_mask, + cpumask_of_node(ordinal)) { + slice = 'a' + cpuid_to_slice(i); + c = cpu_data(i); + seq_printf(s, "cpu %d %s%c local" + " freq %luMHz, arch ia64", + i, obj->location, slice, + c->proc_freq / 1000000); + for_each_online_cpu(j) { + seq_printf(s, j ? ":%d" : ", dist %d", + node_distance( cpu_to_node(i), cpu_to_node(j))); - } - seq_putc(s, '\n'); } + seq_putc(s, '\n'); } } } -- cgit v1.2.3 From b4a2f916a8326065816a0743dd1b0ca2ffd18f5f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 26 Dec 2008 22:23:40 +1030 Subject: cpumask: Mips: Introduce cpumask_of_{node,pcibus} to replace {node,pcibus}_to_cpumask Impact: New APIs The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these return a pointer to a struct cpumask. Part of removing cpumasks from the stack. Signed-off-by: Rusty Russell Cc: Ralf Baechle --- arch/mips/include/asm/mach-ip27/topology.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index 7785bec732f2..c1c3f5b2f18f 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h @@ -25,11 +25,13 @@ extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS]; #define cpu_to_node(cpu) (sn_cpu_info[(cpu)].p_nodeid) #define parent_node(node) (node) #define node_to_cpumask(node) (hub_data(node)->h_cpus) -#define node_to_first_cpu(node) (first_cpu(node_to_cpumask(node))) +#define cpumask_of_node(node) (&hub_data(node)->h_cpus) +#define node_to_first_cpu(node) (cpumask_first(cpumask_of_node(node))) struct pci_bus; extern int pcibus_to_node(struct pci_bus *); #define pcibus_to_cpumask(bus) (cpu_online_map) +#define cpumask_of_pcibus(bus) (cpu_online_mask) extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; -- cgit v1.2.3 From 2258a5bb1064351b552aceaff29393967d694fa3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 26 Dec 2008 22:23:41 +1030 Subject: cpumask: alpha: Introduce cpumask_of_{node,pcibus} to replace {node,pcibus}_to_cpumask Impact: New APIs The old node_to_cpumask/node_to_pcibus returned a cpumask_t: these return a pointer to a struct cpumask. Part of removing cpumasks from the stack. I'm not sure the existing code even compiles, but new version is straightforward. Signed-off-by: Rusty Russell Cc: Richard Henderson --- arch/alpha/include/asm/topology.h | 17 +++++++++++++++++ arch/alpha/kernel/setup.c | 5 +++++ 2 files changed, 22 insertions(+) (limited to 'arch') diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h index 149532e162c4..b4f284c72ff3 100644 --- a/arch/alpha/include/asm/topology.h +++ b/arch/alpha/include/asm/topology.h @@ -39,7 +39,24 @@ static inline cpumask_t node_to_cpumask(int node) return node_cpu_mask; } +extern struct cpumask node_to_cpumask_map[]; +/* FIXME: This is dumb, recalculating every time. But simple. */ +static const struct cpumask *cpumask_of_node(int node) +{ + int cpu; + + cpumask_clear(&node_to_cpumask_map[node]); + + for_each_online_cpu(cpu) { + if (cpu_to_node(cpu) == node) + cpumask_set_cpu(cpu, node_to_cpumask_map[node]); + } + + return &node_to_cpumask_map[node]; +} + #define pcibus_to_cpumask(bus) (cpu_online_map) +#define cpumask_of_pcibus(bus) (cpu_online_mask) #endif /* !CONFIG_NUMA */ # include diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index a449e999027c..02bee6983ce2 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -79,6 +79,11 @@ int alpha_l3_cacheshape; unsigned long alpha_verbose_mcheck = CONFIG_VERBOSE_MCHECK_ON; #endif +#ifdef CONFIG_NUMA +struct cpumask node_to_cpumask_map[MAX_NUMNODES] __read_mostly; +EXPORT_SYMBOL(node_to_cpumask_map); +#endif + /* Which processor we booted from. */ int boot_cpuid; -- cgit v1.2.3 From 030bb203e01db12e3f2866799f4f03a114d06349 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 26 Dec 2008 22:23:41 +1030 Subject: cpumask: cpu_coregroup_mask(): x86 Impact: New API Like cpu_coregroup_map, but returns a (const) pointer. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Cc: Ingo Molnar --- arch/x86/include/asm/topology.h | 1 + arch/x86/kernel/smpboot.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 45da5dc50fc8..168203c0c316 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -231,6 +231,7 @@ static inline int node_to_first_cpu(int node) #endif extern cpumask_t cpu_coregroup_map(int cpu); +extern const struct cpumask *cpu_coregroup_mask(int cpu); #ifdef ENABLE_TOPO_DEFINES #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 468c2f9d47ae..d5274b6b088e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -497,7 +497,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) } /* maps the cpu to the sched domain representing multi-core */ -cpumask_t cpu_coregroup_map(int cpu) +const struct cpumask *cpu_coregroup_mask(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); /* @@ -505,9 +505,14 @@ cpumask_t cpu_coregroup_map(int cpu) * And for power savings, we return cpu_core_map */ if (sched_mc_power_savings || sched_smt_power_savings) - return per_cpu(cpu_core_map, cpu); + return &per_cpu(cpu_core_map, cpu); else - return c->llc_shared_map; + return &c->llc_shared_map; +} + +cpumask_t cpu_coregroup_map(int cpu) +{ + return *cpu_coregroup_mask(cpu); } static void impress_friends(void) -- cgit v1.2.3 From a0ae09b46a516f05ea76e3419ad43c46f52c1165 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 26 Dec 2008 22:23:42 +1030 Subject: cpumask: cpu_coregroup_mask(): sparc Like cpu_coregroup_map, but returns a (const) pointer. Compile-tested on sparc64 (defconfig). Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/sparc/include/asm/topology_64.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index afd3cc1824d7..3270cfb1ab53 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -84,5 +84,6 @@ static inline int pcibus_to_node(struct pci_bus *pbus) #endif /* CONFIG_SMP */ #define cpu_coregroup_map(cpu) (cpu_core_map[cpu]) +#define cpu_coregroup_mask(cpu) (&cpu_core_map[cpu]) #endif /* _ASM_SPARC64_TOPOLOGY_H */ -- cgit v1.2.3 From 9be3eec2c83848a1ca57ebad13c63c95d0df01e2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 26 Dec 2008 22:23:42 +1030 Subject: cpumask: cpu_coregroup_mask(): s390 Like cpu_coregroup_map, but returns a (const) pointer. Compile-tested on s390 (defconfig). Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/s390/include/asm/topology.h | 1 + arch/s390/kernel/topology.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index d96c91643458..fff4a86c602a 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -6,6 +6,7 @@ #define mc_capable() (1) cpumask_t cpu_coregroup_map(unsigned int cpu); +const struct cpumask *cpu_coregroup_mask(unsigned int cpu); extern cpumask_t cpu_core_map[NR_CPUS]; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index a947899dcba1..0601cd3231e4 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -93,6 +93,11 @@ cpumask_t cpu_coregroup_map(unsigned int cpu) return mask; } +const struct cpumask *cpu_coregroup_mask(unsigned int cpu) +{ + return &cpu_core_map[cpu]; +} + static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core) { unsigned int cpu; -- cgit v1.2.3 From 0b936bfdeb85784b7df132b2c64fb34ad9b11ffa Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Tue, 23 Dec 2008 21:51:28 +0530 Subject: x86: reboot.c declare port_cf9_safe before they get used Impact: cleanup, avoid sparse warning Include "../pci/pci.h" for port_cf9_safe Fixes this sparse warning: arch/x86/kernel/reboot.c:43:6: warning: symbol 'port_cf9_safe' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 61f718df6eec..b165eb0884ed 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -22,6 +22,7 @@ #endif #include +#include "../pci/pci.h" /* -- cgit v1.2.3 From b6b301aa9fba57b114c3a00f5f43abf672bd4ecd Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Date: Tue, 23 Dec 2008 21:52:33 +0530 Subject: x86: apic.c x2apic_preenabled and disable_x2apic should be static Impact: cleanup, reduce kernel size a bit, avoid sparse warning Fixes sparse warning: arch/x86/kernel/apic.c:103:5: warning: symbol 'disable_x2apic' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 2 +- arch/x86/kernel/apic.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 25caa0738af5..e644bf6f90dc 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -93,7 +93,7 @@ static inline u32 native_apic_msr_read(u32 reg) } #ifndef CONFIG_X86_32 -extern int x2apic, x2apic_preenabled; +extern int x2apic; extern void check_x2apic(void); extern void enable_x2apic(void); extern void enable_IR_x2apic(void); diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 7397911f8478..3a961bd9f619 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -97,8 +97,8 @@ __setup("apicpmtimer", setup_apicpmtimer); #ifdef HAVE_X2APIC int x2apic; /* x2apic enabled before OS handover */ -int x2apic_preenabled; -int disable_x2apic; +static int x2apic_preenabled; +static int disable_x2apic; static __init int setup_nox2apic(char *str) { disable_x2apic = 1; -- cgit v1.2.3 From 6092848a2a23b660150a38bc06f59d75838d70c8 Mon Sep 17 00:00:00 2001 From: Sergio Luis Date: Sun, 28 Dec 2008 04:12:26 -0300 Subject: x86: mark get_cpu_leaves() with __cpuinit annotation Impact: fix section mismatch warning Commit b2bb85549134c005e997e5a7ed303bda6a1ae738 ("x86: Remove cpumask games in x86/kernel/cpu/intel_cacheinfo.c") introduced get_cpu_leaves(), which references __cpuinit cpuid4_cache_lookup(). Mark get_cpu_leaves() with a __cpuinit annotation. Signed-off-by: Sergio Luis Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index fb7f946cb65e..7bd00a565672 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -534,7 +534,7 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) per_cpu(cpuid4_info, cpu) = NULL; } -static void get_cpu_leaves(void *_retval) +static void __cpuinit get_cpu_leaves(void *_retval) { int j, *retval = _retval, cpu = smp_processor_id(); -- cgit v1.2.3 From c805b7300ed20ec4f10ea385988d6d3fa935b26c Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 27 Dec 2008 17:10:18 +0300 Subject: x86: mach-default setup.c cleanups Impact: cleanup - Break long lines into shorter form. - Use pr_ macros instead of plain printk. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/mach-default/setup.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 37b9ae4d44c5..df167f265622 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -133,29 +133,28 @@ void __init time_init_hook(void) **/ void mca_nmi_hook(void) { - /* If I recall correctly, there's a whole bunch of other things that + /* + * If I recall correctly, there's a whole bunch of other things that * we can do to check for NMI problems, but that's all I know about * at the moment. */ - - printk("NMI generated from unknown source!\n"); + pr_warning("NMI generated from unknown source!\n"); } #endif static __init int no_ipi_broadcast(char *str) { get_option(&str, &no_broadcast); - printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : - "IPI Broadcast"); + pr_info("Using %s mode\n", + no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); return 1; } - __setup("no_ipi_broadcast=", no_ipi_broadcast); static int __init print_ipi_mode(void) { - printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : - "Shortcut"); + pr_info("Using IPI %s mode\n", + no_broadcast ? "No-Shortcut" : "Shortcut"); return 0; } -- cgit v1.2.3 From 2f06de0671096e19350c9efe21cfdbc0891aab20 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 27 Dec 2008 21:37:10 +0530 Subject: x86: introducing asm/sys_ia32.h Impact: cleanup, avoid 44 sparse warnings, new file asm/sys_ia32.h Fixes following sparse warnings: CHECK arch/x86/ia32/sys_ia32.c arch/x86/ia32/sys_ia32.c:53:17: warning: symbol 'sys32_truncate64' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:60:17: warning: symbol 'sys32_ftruncate64' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:98:17: warning: symbol 'sys32_stat64' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:109:17: warning: symbol 'sys32_lstat64' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:119:17: warning: symbol 'sys32_fstat64' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:128:17: warning: symbol 'sys32_fstatat' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:164:17: warning: symbol 'sys32_mmap' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:195:17: warning: symbol 'sys32_mprotect' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:201:17: warning: symbol 'sys32_pipe' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:215:17: warning: symbol 'sys32_rt_sigaction' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:291:17: warning: symbol 'sys32_sigaction' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:330:17: warning: symbol 'sys32_rt_sigprocmask' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:370:17: warning: symbol 'sys32_alarm' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:383:17: warning: symbol 'sys32_old_select' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:393:17: warning: symbol 'sys32_waitpid' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:401:17: warning: symbol 'sys32_sysfs' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:406:17: warning: symbol 'sys32_sched_rr_get_interval' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:421:17: warning: symbol 'sys32_rt_sigpending' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:445:17: warning: symbol 'sys32_rt_sigqueueinfo' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:472:17: warning: symbol 'sys32_sysctl' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:517:17: warning: symbol 'sys32_pread' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:524:17: warning: symbol 'sys32_pwrite' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:532:17: warning: symbol 'sys32_personality' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:545:17: warning: symbol 'sys32_sendfile' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:565:17: warning: symbol 'sys32_mmap2' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:589:17: warning: symbol 'sys32_olduname' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:626:6: warning: symbol 'sys32_uname' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:641:6: warning: symbol 'sys32_ustat' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:663:17: warning: symbol 'sys32_execve' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:678:17: warning: symbol 'sys32_clone' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:693:6: warning: symbol 'sys32_lseek' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:698:6: warning: symbol 'sys32_kill' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:703:6: warning: symbol 'sys32_fadvise64_64' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:712:6: warning: symbol 'sys32_vm86_warning' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:726:6: warning: symbol 'sys32_lookup_dcookie' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:732:20: warning: symbol 'sys32_readahead' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:738:17: warning: symbol 'sys32_sync_file_range' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:746:17: warning: symbol 'sys32_fadvise64' was not declared. Should it be static? arch/x86/ia32/sys_ia32.c:753:17: warning: symbol 'sys32_fallocate' was not declared. Should it be static? CHECK arch/x86/ia32/ia32_signal.c arch/x86/ia32/ia32_signal.c:126:17: warning: symbol 'sys32_sigsuspend' was not declared. Should it be static? arch/x86/ia32/ia32_signal.c:141:17: warning: symbol 'sys32_sigaltstack' was not declared. Should it be static? arch/x86/ia32/ia32_signal.c:249:17: warning: symbol 'sys32_sigreturn' was not declared. Should it be static? arch/x86/ia32/ia32_signal.c:279:17: warning: symbol 'sys32_rt_sigreturn' was not declared. Should it be static? CHECK arch/x86/ia32/ipc32.c arch/x86/ia32/ipc32.c:12:17: warning: symbol 'sys32_ipc' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/ia32/ia32_signal.c | 3 +- arch/x86/ia32/ipc32.c | 1 + arch/x86/ia32/sys_ia32.c | 2 +- arch/x86/include/asm/sys_ia32.h | 101 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+), 3 deletions(-) create mode 100644 arch/x86/include/asm/sys_ia32.h (limited to 'arch') diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index b195f85526e3..9dabd00e9805 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -24,15 +24,14 @@ #include #include #include -#include #include #include #include #include #include #include - #include +#include #define DEBUG_SIG 0 diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c index d21991ce606c..29cdcd02ead3 100644 --- a/arch/x86/ia32/ipc32.c +++ b/arch/x86/ia32/ipc32.c @@ -8,6 +8,7 @@ #include #include #include +#include asmlinkage long sys32_ipc(u32 call, int first, int second, int third, compat_uptr_t ptr, u32 fifth) diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 2e09dcd3c0a6..6c0d7f6231af 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -44,8 +44,8 @@ #include #include #include -#include #include +#include #define AA(__x) ((unsigned long)(__x)) diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h new file mode 100644 index 000000000000..ffb08be2a530 --- /dev/null +++ b/arch/x86/include/asm/sys_ia32.h @@ -0,0 +1,101 @@ +/* + * sys_ia32.h - Linux ia32 syscall interfaces + * + * Copyright (c) 2008 Jaswinder Singh Rajput + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + */ + +#ifndef _ASM_X86_SYS_IA32_H +#define _ASM_X86_SYS_IA32_H + +#include +#include +#include +#include +#include +#include + +/* ia32/sys_ia32.c */ +asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long); +asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); + +asmlinkage long sys32_stat64(char __user *, struct stat64 __user *); +asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *); +asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); +asmlinkage long sys32_fstatat(unsigned int, char __user *, + struct stat64 __user *, int); +struct mmap_arg_struct; +asmlinkage long sys32_mmap(struct mmap_arg_struct __user *); +asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); + +asmlinkage long sys32_pipe(int __user *); +struct sigaction32; +struct old_sigaction32; +asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, + struct sigaction32 __user *, unsigned int); +asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, + struct old_sigaction32 __user *); +asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *, + compat_sigset_t __user *, unsigned int); +asmlinkage long sys32_alarm(unsigned int); + +struct sel_arg_struct; +asmlinkage long sys32_old_select(struct sel_arg_struct __user *); +asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); +asmlinkage long sys32_sysfs(int, u32, u32); + +asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, + struct compat_timespec __user *); +asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t); +asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); + +#ifdef CONFIG_SYSCTL_SYSCALL +struct sysctl_ia32; +asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *); +#endif + +asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); +asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); + +asmlinkage long sys32_personality(unsigned long); +asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); + +asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); + +struct oldold_utsname; +struct old_utsname; +asmlinkage long sys32_olduname(struct oldold_utsname __user *); +long sys32_uname(struct old_utsname __user *); + +long sys32_ustat(unsigned, struct ustat32 __user *); + +asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, + compat_uptr_t __user *, struct pt_regs *); +asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); + +long sys32_lseek(unsigned int, int, unsigned int); +long sys32_kill(int, int); +long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); +long sys32_vm86_warning(void); +long sys32_lookup_dcookie(u32, u32, char __user *, size_t); + +asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); +asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, + unsigned, unsigned, int); +asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int); +asmlinkage long sys32_fallocate(int, int, unsigned, + unsigned, unsigned, unsigned); + +/* ia32/ia32_signal.c */ +asmlinkage long sys32_sigsuspend(int, int, old_sigset_t); +asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *, + stack_ia32_t __user *, struct pt_regs *); +asmlinkage long sys32_sigreturn(struct pt_regs *); +asmlinkage long sys32_rt_sigreturn(struct pt_regs *); + +/* ia32/ipc32.c */ +asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); +#endif /* _ASM_X86_SYS_IA32_H */ -- cgit v1.2.3 From a1ae299dfb6ef219b296b61d1f222732391973b5 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 29 Dec 2008 20:32:52 +0530 Subject: x86: apic.c declare pic_mode before they get used Impact: cleanup, avoid sparse warning In asm/mpspec.h moved out pic_mode from CONFIG_X86_32 as it is common for both 32 and 64 bit. Fixes this sparse warning for x86_64: arch/x86/kernel/apic.c:128:5: warning: symbol 'pic_mode' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mpspec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 91885c28f66b..62d14ce3cd00 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h @@ -6,13 +6,13 @@ #include extern int apic_version[MAX_APICS]; +extern int pic_mode; #ifdef CONFIG_X86_32 #include extern unsigned int def_to_bigsmp; extern u8 apicid_2_node[]; -extern int pic_mode; #ifdef CONFIG_X86_NUMAQ extern int mp_bus_id_to_node[MAX_MP_BUSSES]; -- cgit v1.2.3 From 7f3e632f9d8d234819bcdef7a68fc8b84f7d3d3d Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 29 Dec 2008 20:34:35 +0530 Subject: x86: io_apic.c io_apic_sync should be static Impact: cleanup, reduce kernel size a bit, avoid sparse warning Fixes sparse warning: arch/x86/kernel/io_apic.c:709:6: warning: symbol 'io_apic_sync' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 679e7bbbbcd6..b8c8a8e99341 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -481,7 +481,7 @@ static void __unmask_IO_APIC_irq(unsigned int irq) } #ifdef CONFIG_X86_64 -void io_apic_sync(struct irq_pin_list *entry) +static void io_apic_sync(struct irq_pin_list *entry) { /* * Synchronize the IO-APIC and the CPU by doing -- cgit v1.2.3 From cbafbc826bf645f7fbbfbb2ff20138e5ccb4700e Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 29 Dec 2008 20:36:40 +0530 Subject: x86: efi.c declare add_efi_memmap before they get used Impact: cleanup, avoid sparse warning Fixes this sparse warning: arch/x86/kernel/efi.c:67:5: warning: symbol 'add_efi_memmap' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index a2e545c91c35..ca5ffb2856b6 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size); #endif /* CONFIG_X86_32 */ +extern int add_efi_memmap; extern void efi_reserve_early(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); -- cgit v1.2.3 From c854c91979e0717c619bc55e124d41d60d5eb3d6 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 29 Dec 2008 20:38:09 +0530 Subject: x86_64: pci-gart_64.c iommu_fullflush should be static Impact: cleanup, reduce kernel size a bit, avoid sparse warning Fixes sparse warning: arch/x86/kernel/pci-gart_64.c:55:5: warning: symbol 'iommu_fullflush' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a35eaa379ff6..00c2bcd41463 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -52,7 +52,7 @@ static u32 *iommu_gatt_base; /* Remapping table */ * to trigger bugs with some popular PCI cards, in particular 3ware (but * has been also also seen with Qlogic at least). */ -int iommu_fullflush = 1; +static int iommu_fullflush = 1; /* Allocation bitmap for the remapping area: */ static DEFINE_SPINLOCK(iommu_bitmap_lock); -- cgit v1.2.3 From 824877111cd7f2b4fd2fe6947c5c5cbbb3ac5bd8 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sat, 27 Dec 2008 18:32:28 +0530 Subject: x86, pci: move arch/x86/pci/pci.h to arch/x86/include/asm/pci_x86.h Impact: cleanup Now that arch/x86/pci/pci.h is used in a number of other places as well, move the lowlevel x86 pci definitions into the architecture include files. (not to be confused with the existing arch/x86/include/asm/pci.h file, which provides public details about x86 PCI) Tested on: X86_32_UP, X86_32_SMP and X86_64_SMP Signed-off-by: Jaswinder Singh Rajput Acked-by: Jesse Barnes Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pci_x86.h | 165 +++++++++++++++++++++++++++++++++++++ arch/x86/kernel/mmconf-fam10h_64.c | 3 +- arch/x86/kernel/reboot.c | 3 +- arch/x86/pci/acpi.c | 2 +- arch/x86/pci/amd_bus.c | 2 +- arch/x86/pci/common.c | 3 +- arch/x86/pci/direct.c | 2 +- arch/x86/pci/early.c | 2 +- arch/x86/pci/fixup.c | 3 +- arch/x86/pci/i386.c | 2 +- arch/x86/pci/init.c | 2 +- arch/x86/pci/irq.c | 3 +- arch/x86/pci/legacy.c | 2 +- arch/x86/pci/mmconfig-shared.c | 3 +- arch/x86/pci/mmconfig_32.c | 2 +- arch/x86/pci/mmconfig_64.c | 3 +- arch/x86/pci/numaq_32.c | 2 +- arch/x86/pci/olpc.c | 2 +- arch/x86/pci/pcbios.c | 5 +- arch/x86/pci/pci.h | 162 ------------------------------------ arch/x86/pci/visws.c | 3 +- drivers/pci/hotplug/cpqphp_core.c | 2 +- drivers/pci/hotplug/cpqphp_pci.c | 2 +- drivers/pci/hotplug/ibmphp_core.c | 2 +- 24 files changed, 188 insertions(+), 194 deletions(-) create mode 100644 arch/x86/include/asm/pci_x86.h delete mode 100644 arch/x86/pci/pci.h (limited to 'arch') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h new file mode 100644 index 000000000000..e60fd3e14bdf --- /dev/null +++ b/arch/x86/include/asm/pci_x86.h @@ -0,0 +1,165 @@ +/* + * Low-Level PCI Access for i386 machines. + * + * (c) 1999 Martin Mares + */ + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +#define PCI_PROBE_BIOS 0x0001 +#define PCI_PROBE_CONF1 0x0002 +#define PCI_PROBE_CONF2 0x0004 +#define PCI_PROBE_MMCONF 0x0008 +#define PCI_PROBE_MASK 0x000f +#define PCI_PROBE_NOEARLY 0x0010 + +#define PCI_NO_CHECKS 0x0400 +#define PCI_USE_PIRQ_MASK 0x0800 +#define PCI_ASSIGN_ROMS 0x1000 +#define PCI_BIOS_IRQ_SCAN 0x2000 +#define PCI_ASSIGN_ALL_BUSSES 0x4000 +#define PCI_CAN_SKIP_ISA_ALIGN 0x8000 +#define PCI_USE__CRS 0x10000 +#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 +#define PCI_HAS_IO_ECS 0x40000 +#define PCI_NOASSIGN_ROMS 0x80000 + +extern unsigned int pci_probe; +extern unsigned long pirq_table_addr; + +enum pci_bf_sort_state { + pci_bf_sort_default, + pci_force_nobf, + pci_force_bf, + pci_dmi_bf, +}; + +/* pci-i386.c */ + +extern unsigned int pcibios_max_latency; + +void pcibios_resource_survey(void); + +/* pci-pc.c */ + +extern int pcibios_last_bus; +extern struct pci_bus *pci_root_bus; +extern struct pci_ops pci_root_ops; + +/* pci-irq.c */ + +struct irq_info { + u8 bus, devfn; /* Bus, device and function */ + struct { + u8 link; /* IRQ line ID, chipset dependent, + 0 = not routed */ + u16 bitmap; /* Available IRQs */ + } __attribute__((packed)) irq[4]; + u8 slot; /* Slot number, 0=onboard */ + u8 rfu; +} __attribute__((packed)); + +struct irq_routing_table { + u32 signature; /* PIRQ_SIGNATURE should be here */ + u16 version; /* PIRQ_VERSION */ + u16 size; /* Table size in bytes */ + u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ + u16 exclusive_irqs; /* IRQs devoted exclusively to + PCI usage */ + u16 rtr_vendor, rtr_device; /* Vendor and device ID of + interrupt router */ + u32 miniport_data; /* Crap */ + u8 rfu[11]; + u8 checksum; /* Modulo 256 checksum must give 0 */ + struct irq_info slots[0]; +} __attribute__((packed)); + +extern unsigned int pcibios_irq_mask; + +extern int pcibios_scanned; +extern spinlock_t pci_config_lock; + +extern int (*pcibios_enable_irq)(struct pci_dev *dev); +extern void (*pcibios_disable_irq)(struct pci_dev *dev); + +struct pci_raw_ops { + int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, + int reg, int len, u32 *val); + int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn, + int reg, int len, u32 val); +}; + +extern struct pci_raw_ops *raw_pci_ops; +extern struct pci_raw_ops *raw_pci_ext_ops; + +extern struct pci_raw_ops pci_direct_conf1; +extern bool port_cf9_safe; + +/* arch_initcall level */ +extern int pci_direct_probe(void); +extern void pci_direct_init(int type); +extern void pci_pcbios_init(void); +extern int pci_olpc_init(void); +extern void __init dmi_check_pciprobe(void); +extern void __init dmi_check_skip_isa_align(void); + +/* some common used subsys_initcalls */ +extern int __init pci_acpi_init(void); +extern int __init pcibios_irq_init(void); +extern int __init pci_visws_init(void); +extern int __init pci_numaq_init(void); +extern int __init pcibios_init(void); + +/* pci-mmconfig.c */ + +extern int __init pci_mmcfg_arch_init(void); +extern void __init pci_mmcfg_arch_free(void); + +/* + * AMD Fam10h CPUs are buggy, and cannot access MMIO config space + * on their northbrige except through the * %eax register. As such, you MUST + * NOT use normal IOMEM accesses, you need to only use the magic mmio-config + * accessor functions. + * In fact just use pci_config_*, nothing else please. + */ +static inline unsigned char mmio_config_readb(void __iomem *pos) +{ + u8 val; + asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos)); + return val; +} + +static inline unsigned short mmio_config_readw(void __iomem *pos) +{ + u16 val; + asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos)); + return val; +} + +static inline unsigned int mmio_config_readl(void __iomem *pos) +{ + u32 val; + asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos)); + return val; +} + +static inline void mmio_config_writeb(void __iomem *pos, u8 val) +{ + asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory"); +} + +static inline void mmio_config_writew(void __iomem *pos, u16 val) +{ + asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory"); +} + +static inline void mmio_config_writel(void __iomem *pos, u32 val) +{ + asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory"); +} diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index efc2f361fe85..666e43df51f9 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -13,8 +13,7 @@ #include #include #include - -#include "../pci/pci.h" +#include struct pci_hostbridge_probe { u32 bus; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index b165eb0884ed..a90913cccfb7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -12,6 +12,7 @@ #include #include #include +#include #ifdef CONFIG_X86_32 # include @@ -22,8 +23,6 @@ #endif #include -#include "../pci/pci.h" - /* * Power off function, if any diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 1d88d2b39771..9e5752fe4d15 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -4,7 +4,7 @@ #include #include #include -#include "pci.h" +#include struct pci_root_info { char *name; diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 22e057665e55..9bb09823b362 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -2,7 +2,7 @@ #include #include #include -#include "pci.h" +#include #ifdef CONFIG_X86_64 #include diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index bb1a01f089e2..62ddb73e09ed 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -14,8 +14,7 @@ #include #include #include - -#include "pci.h" +#include unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | PCI_PROBE_MMCONF; diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 9a5af6c8fbe9..bd13c3e4c6db 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -5,7 +5,7 @@ #include #include #include -#include "pci.h" +#include /* * Functions for accessing PCI base (first 256 bytes) and extended diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index 86631ccbc25a..f6adf2c6d751 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -2,7 +2,7 @@ #include #include #include -#include "pci.h" +#include /* Direct PCI access. This is used for PCI accesses in early boot before the PCI subsystem works. */ diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 2051dc96b8e9..7d388d5cf548 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -6,8 +6,7 @@ #include #include #include -#include "pci.h" - +#include static void __devinit pci_fixup_i450nx(struct pci_dev *d) { diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 844df0cbbd3e..e51bf2cda4b0 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -34,8 +34,8 @@ #include #include +#include -#include "pci.h" static int skip_isa_ioresource_align(struct pci_dev *dev) { diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index d6c950f81858..bec3b048e72b 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c @@ -1,6 +1,6 @@ #include #include -#include "pci.h" +#include /* arch_initcall has too random ordering, so call the initializers in the right sequence from here. */ diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index bf69dbe08bff..373b9afe6d44 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -16,8 +16,7 @@ #include #include #include - -#include "pci.h" +#include #define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) #define PIRQ_VERSION 0x0100 diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index b722dd481b39..f1065b129e9c 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -3,7 +3,7 @@ */ #include #include -#include "pci.h" +#include /* * Discover remaining PCI buses in case there are peer host bridges. diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 654a2234f8f3..89bf9242c80a 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -15,8 +15,7 @@ #include #include #include - -#include "pci.h" +#include /* aperture is up to 256MB but BIOS may reserve less */ #define MMCONFIG_APER_MIN (2 * 1024*1024) diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index f3c761dce695..8b2d561046a3 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -13,7 +13,7 @@ #include #include #include -#include "pci.h" +#include /* Assume systems with more busses have correct MCFG */ #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index a1994163c99d..30007ffc8e11 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -10,8 +10,7 @@ #include #include #include - -#include "pci.h" +#include /* Static virtual mapping of the MMCONFIG aperture */ struct mmcfg_virt { diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 1177845d3186..2089354968a2 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -7,7 +7,7 @@ #include #include #include -#include "pci.h" +#include #define XQUAD_PORTIO_BASE 0xfe400000 #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c index e11e9e803d5f..b889d824f7c6 100644 --- a/arch/x86/pci/olpc.c +++ b/arch/x86/pci/olpc.c @@ -29,7 +29,7 @@ #include #include #include -#include "pci.h" +#include /* * In the tables below, the first two line (8 longwords) are the diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 37472fc6f729..b82cae970dfd 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c @@ -6,9 +6,8 @@ #include #include #include -#include "pci.h" -#include "pci-functions.h" - +#include +#include /* BIOS32 signature: "_32_" */ #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h deleted file mode 100644 index 1959018aac02..000000000000 --- a/arch/x86/pci/pci.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Low-Level PCI Access for i386 machines. - * - * (c) 1999 Martin Mares - */ - -#undef DEBUG - -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - -#define PCI_PROBE_BIOS 0x0001 -#define PCI_PROBE_CONF1 0x0002 -#define PCI_PROBE_CONF2 0x0004 -#define PCI_PROBE_MMCONF 0x0008 -#define PCI_PROBE_MASK 0x000f -#define PCI_PROBE_NOEARLY 0x0010 - -#define PCI_NO_CHECKS 0x0400 -#define PCI_USE_PIRQ_MASK 0x0800 -#define PCI_ASSIGN_ROMS 0x1000 -#define PCI_BIOS_IRQ_SCAN 0x2000 -#define PCI_ASSIGN_ALL_BUSSES 0x4000 -#define PCI_CAN_SKIP_ISA_ALIGN 0x8000 -#define PCI_USE__CRS 0x10000 -#define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 -#define PCI_HAS_IO_ECS 0x40000 -#define PCI_NOASSIGN_ROMS 0x80000 - -extern unsigned int pci_probe; -extern unsigned long pirq_table_addr; - -enum pci_bf_sort_state { - pci_bf_sort_default, - pci_force_nobf, - pci_force_bf, - pci_dmi_bf, -}; - -/* pci-i386.c */ - -extern unsigned int pcibios_max_latency; - -void pcibios_resource_survey(void); - -/* pci-pc.c */ - -extern int pcibios_last_bus; -extern struct pci_bus *pci_root_bus; -extern struct pci_ops pci_root_ops; - -/* pci-irq.c */ - -struct irq_info { - u8 bus, devfn; /* Bus, device and function */ - struct { - u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ - u16 bitmap; /* Available IRQs */ - } __attribute__((packed)) irq[4]; - u8 slot; /* Slot number, 0=onboard */ - u8 rfu; -} __attribute__((packed)); - -struct irq_routing_table { - u32 signature; /* PIRQ_SIGNATURE should be here */ - u16 version; /* PIRQ_VERSION */ - u16 size; /* Table size in bytes */ - u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ - u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ - u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ - u32 miniport_data; /* Crap */ - u8 rfu[11]; - u8 checksum; /* Modulo 256 checksum must give zero */ - struct irq_info slots[0]; -} __attribute__((packed)); - -extern unsigned int pcibios_irq_mask; - -extern int pcibios_scanned; -extern spinlock_t pci_config_lock; - -extern int (*pcibios_enable_irq)(struct pci_dev *dev); -extern void (*pcibios_disable_irq)(struct pci_dev *dev); - -struct pci_raw_ops { - int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn, - int reg, int len, u32 *val); - int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn, - int reg, int len, u32 val); -}; - -extern struct pci_raw_ops *raw_pci_ops; -extern struct pci_raw_ops *raw_pci_ext_ops; - -extern struct pci_raw_ops pci_direct_conf1; -extern bool port_cf9_safe; - -/* arch_initcall level */ -extern int pci_direct_probe(void); -extern void pci_direct_init(int type); -extern void pci_pcbios_init(void); -extern int pci_olpc_init(void); -extern void __init dmi_check_pciprobe(void); -extern void __init dmi_check_skip_isa_align(void); - -/* some common used subsys_initcalls */ -extern int __init pci_acpi_init(void); -extern int __init pcibios_irq_init(void); -extern int __init pci_visws_init(void); -extern int __init pci_numaq_init(void); -extern int __init pcibios_init(void); - -/* pci-mmconfig.c */ - -extern int __init pci_mmcfg_arch_init(void); -extern void __init pci_mmcfg_arch_free(void); - -/* - * AMD Fam10h CPUs are buggy, and cannot access MMIO config space - * on their northbrige except through the * %eax register. As such, you MUST - * NOT use normal IOMEM accesses, you need to only use the magic mmio-config - * accessor functions. - * In fact just use pci_config_*, nothing else please. - */ -static inline unsigned char mmio_config_readb(void __iomem *pos) -{ - u8 val; - asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos)); - return val; -} - -static inline unsigned short mmio_config_readw(void __iomem *pos) -{ - u16 val; - asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos)); - return val; -} - -static inline unsigned int mmio_config_readl(void __iomem *pos) -{ - u32 val; - asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos)); - return val; -} - -static inline void mmio_config_writeb(void __iomem *pos, u8 val) -{ - asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory"); -} - -static inline void mmio_config_writew(void __iomem *pos, u16 val) -{ - asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory"); -} - -static inline void mmio_config_writel(void __iomem *pos, u32 val) -{ - asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory"); -} diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index 42f4cb19faca..16d0c0eb0d19 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c @@ -9,11 +9,10 @@ #include #include +#include #include #include -#include "pci.h" - static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } static void pci_visws_disable_irq(struct pci_dev *dev) { } diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c index 8514c3a1746a..c2e1bcbb28a7 100644 --- a/drivers/pci/hotplug/cpqphp_core.c +++ b/drivers/pci/hotplug/cpqphp_core.c @@ -45,7 +45,7 @@ #include "cpqphp.h" #include "cpqphp_nvram.h" -#include "../../../arch/x86/pci/pci.h" /* horrible hack showing how processor dependent we are... */ +#include /* Global variables */ diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c index 09021930589f..df146be9d2e9 100644 --- a/drivers/pci/hotplug/cpqphp_pci.c +++ b/drivers/pci/hotplug/cpqphp_pci.c @@ -37,7 +37,7 @@ #include "../pci.h" #include "cpqphp.h" #include "cpqphp_nvram.h" -#include "../../../arch/x86/pci/pci.h" /* horrible hack showing how processor dependent we are... */ +#include u8 cpqhp_nic_irq; diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c index 633e743442ac..dd18f857dfb0 100644 --- a/drivers/pci/hotplug/ibmphp_core.c +++ b/drivers/pci/hotplug/ibmphp_core.c @@ -35,7 +35,7 @@ #include #include #include "../pci.h" -#include "../../../arch/x86/pci/pci.h" /* for struct irq_routing_table */ +#include /* for struct irq_routing_table */ #include "ibmphp.h" #define attn_on(sl) ibmphp_hpc_writeslot (sl, HPC_SLOT_ATTNON) -- cgit v1.2.3 From 412a1be265b894a45cebbfc2b57eb7a593bf34b2 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 29 Dec 2008 21:44:12 +0530 Subject: x86: amd_iommu_init.c: iommu_enable and iommu_enable_event_logging should be static Impact: cleanup, reduce kernel size a bit, avoid sparse warning Fixes sparse warning: arch/x86/kernel/amd_iommu_init.c:246:13: warning: symbol 'iommu_enable' was not declared. Should it be static? arch/x86/kernel/amd_iommu_init.c:259:13: warning: symbol 'iommu_enable_event_logging' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Signed-off-by: H. Peter Anvin --- arch/x86/kernel/amd_iommu_init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c625800c55ca..fb85e8d466cc 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -243,7 +243,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) } /* Function to enable the hardware */ -void __init iommu_enable(struct amd_iommu *iommu) +static void __init iommu_enable(struct amd_iommu *iommu) { printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " "at %02x:%02x.%x cap 0x%hx\n", @@ -256,7 +256,7 @@ void __init iommu_enable(struct amd_iommu *iommu) } /* Function to enable IOMMU event logging and event interrupts */ -void __init iommu_enable_event_logging(struct amd_iommu *iommu) +static void __init iommu_enable_event_logging(struct amd_iommu *iommu) { iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); -- cgit v1.2.3 From 557f687c87ddb8adb094b2dad4e1c83c7717982d Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 29 Dec 2008 21:45:22 +0530 Subject: x86: amd_iommu.c: prealloc_protection_domains should be static Impact: cleanup, reduce kernel size a bit, avoid sparse warning Fixes sparse warning: arch/x86/kernel/amd_iommu.c:1299:6: warning: symbol 'prealloc_protection_domains' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Signed-off-by: H. Peter Anvin --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 2e2da717b350..658e29e0f49b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1296,7 +1296,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) * we don't need to preallocate the protection domains anymore. * For now we have to. */ -void prealloc_protection_domains(void) +static void prealloc_protection_domains(void) { struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; -- cgit v1.2.3 From 4d08d97f5262dab4482af5bc91b30af4ca02269e Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 29 Dec 2008 22:11:40 +0530 Subject: x86: genx2apic_phys.c: x2apic_send_IPI_self and init_x2apic_ldr should be static Impact: cleanup, reduce kernel size a bit, avoid sparse warnings Fixes sparse warnings: arch/x86/kernel/genx2apic_phys.c:164:6: warning: symbol 'x2apic_send_IPI_self' was not declared. Should it be static? arch/x86/kernel/genx2apic_phys.c:169:6: warning: symbol 'init_x2apic_ldr' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Signed-off-by: H. Peter Anvin --- arch/x86/kernel/genx2apic_phys.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index d042211768b7..a177c7880ab5 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c @@ -123,12 +123,12 @@ static unsigned int phys_pkg_id(int index_msb) return current_cpu_data.initial_apicid >> index_msb; } -void x2apic_send_IPI_self(int vector) +static void x2apic_send_IPI_self(int vector) { apic_write(APIC_SELF_IPI, vector); } -void init_x2apic_ldr(void) +static void init_x2apic_ldr(void) { return; } -- cgit v1.2.3 From c62e9d56ea90ef94f9708ce3f11860c20fa5e135 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 29 Dec 2008 22:12:50 +0530 Subject: x86: bios_uv.c: uv_systab should be static Impact: cleanup, reduce kernel size a bit, avoid sparse warning Fixes sparse warning: arch/x86/kernel/bios_uv.c:28:18: warning: symbol 'uv_systab' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Signed-off-by: H. Peter Anvin --- arch/x86/kernel/bios_uv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index 2a0a2a3cac26..f63882728d91 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -25,7 +25,7 @@ #include #include -struct uv_systab uv_systab; +static struct uv_systab uv_systab; s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) { -- cgit v1.2.3 From ec8c842a524888fdcccece337d91798e3e8af880 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 30 Dec 2008 22:46:36 +0530 Subject: x86: apic.c: xapic_icr_read and x2apic_icr_read should be static Impact: cleanup, reduce kernel size a bit, avoid sparse warning Fixes sparse warning: arch/x86/kernel/apic.c:270:5: warning: symbol 'x2apic_icr_read' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/apic.h | 1 - arch/x86/kernel/apic.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index e644bf6f90dc..ab1d51a8855e 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -54,7 +54,6 @@ extern int disable_apic; extern int is_vsmp_box(void); extern void xapic_wait_icr_idle(void); extern u32 safe_xapic_wait_icr_idle(void); -extern u64 xapic_icr_read(void); extern void xapic_icr_write(u32, u32); extern int setup_profiling_timer(unsigned int); diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index c67722f010bd..66198cbe464d 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -228,7 +228,7 @@ void xapic_icr_write(u32 low, u32 id) apic_write(APIC_ICR, low); } -u64 xapic_icr_read(void) +static u64 xapic_icr_read(void) { u32 icr1, icr2; @@ -268,7 +268,7 @@ void x2apic_icr_write(u32 low, u32 id) wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); } -u64 x2apic_icr_read(void) +static u64 x2apic_icr_read(void) { unsigned long val; -- cgit v1.2.3 From fa95826fe0ddbc2a55373134d8d1a21b49d13434 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 30 Dec 2008 20:13:49 +0530 Subject: x86: uv_bau.h: fix dubious bitfield Impact: cleanup, avoid sparse warnings declare bitfield as unsigned to avoid dubious bitfield issue CHECK arch/x86/kernel/tlb_64.c arch/x86/include/asm/uv/uv_bau.h:136:22: warning: dubious bitfield without explicit `signed' or `unsigned' arch/x86/include/asm/uv/uv_bau.h:138:25: warning: dubious bitfield without explicit `signed' or `unsigned' arch/x86/include/asm/uv/uv_bau.h:140:15: warning: dubious bitfield without explicit `signed' or `unsigned' arch/x86/include/asm/uv/uv_bau.h:143:14: warning: dubious bitfield without explicit `signed' or `unsigned' arch/x86/include/asm/uv/uv_bau.h:146:14: warning: dubious bitfield without explicit `signed' or `unsigned' arch/x86/include/asm/uv/uv_bau.h:149:18: warning: dubious bitfield without explicit `signed' or `unsigned' arch/x86/include/asm/uv/uv_bau.h:151:18: warning: dubious bitfield without explicit `signed' or `unsigned' arch/x86/include/asm/uv/uv_bau.h:155:14: error: dubious one-bit signed bitfield arch/x86/include/asm/uv/uv_bau.h:159:18: error: dubious one-bit signed bitfield arch/x86/include/asm/uv/uv_bau.h:173:19: error: dubious one-bit signed bitfield arch/x86/include/asm/uv/uv_bau.h:181:16: error: dubious one-bit signed bitfield arch/x86/include/asm/uv/uv_bau.h:185:18: error: dubious one-bit signed bitfield arch/x86/include/asm/uv/uv_bau.h:188:16: error: dubious one-bit signed bitfield CHECK arch/x86/kernel/tlb_uv.c arch/x86/include/asm/uv/uv_bau.h:136:22: warning: dubious bitfield without explicit `signed' or `unsigned' arch/x86/include/asm/uv/uv_bau.h:138:25: warning: dubious bitfield without explicit `signed' or `unsigned' arch/x86/include/asm/uv/uv_bau.h:140:15: warning: dubious bitfield without explicit `signed' or `unsigned' arch/x86/include/asm/uv/uv_bau.h:143:14: warning: dubious bitfield without explicit `signed' or `unsigned' arch/x86/include/asm/uv/uv_bau.h:146:14: warning: dubious bitfield without explicit `signed' or `unsigned' arch/x86/include/asm/uv/uv_bau.h:149:18: warning: dubious bitfield without explicit `signed' or `unsigned' arch/x86/include/asm/uv/uv_bau.h:151:18: warning: dubious bitfield without explicit `signed' or `unsigned' arch/x86/include/asm/uv/uv_bau.h:155:14: error: dubious one-bit signed bitfield arch/x86/include/asm/uv/uv_bau.h:159:18: error: dubious one-bit signed bitfield arch/x86/include/asm/uv/uv_bau.h:173:19: error: dubious one-bit signed bitfield arch/x86/include/asm/uv/uv_bau.h:181:16: error: dubious one-bit signed bitfield arch/x86/include/asm/uv/uv_bau.h:185:18: error: dubious one-bit signed bitfield arch/x86/include/asm/uv/uv_bau.h:188:16: error: dubious one-bit signed bitfield Signed-off-by: Jaswinder Singh Rajput Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uv/uv_bau.h | 46 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index e2363253bbbf..50423c7b56b2 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -133,61 +133,61 @@ struct bau_msg_payload { * see table 4.2.3.0.1 in broacast_assist spec. */ struct bau_msg_header { - int dest_subnodeid:6; /* must be zero */ + unsigned int dest_subnodeid:6; /* must be zero */ /* bits 5:0 */ - int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */ - /* bits 20:6 */ - int command:8; /* message type */ + unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ + /* bits 20:6 */ /* first bit in node_map */ + unsigned int command:8; /* message type */ /* bits 28:21 */ /* 0x38: SN3net EndPoint Message */ - int rsvd_1:3; /* must be zero */ + unsigned int rsvd_1:3; /* must be zero */ /* bits 31:29 */ /* int will align on 32 bits */ - int rsvd_2:9; /* must be zero */ + unsigned int rsvd_2:9; /* must be zero */ /* bits 40:32 */ /* Suppl_A is 56-41 */ - int payload_2a:8; /* becomes byte 16 of msg */ + unsigned int payload_2a:8;/* becomes byte 16 of msg */ /* bits 48:41 */ /* not currently using */ - int payload_2b:8; /* becomes byte 17 of msg */ + unsigned int payload_2b:8;/* becomes byte 17 of msg */ /* bits 56:49 */ /* not currently using */ /* Address field (96:57) is never used as an address (these are address bits 42:3) */ - int rsvd_3:1; /* must be zero */ + unsigned int rsvd_3:1; /* must be zero */ /* bit 57 */ /* address bits 27:4 are payload */ /* these 24 bits become bytes 12-14 of msg */ - int replied_to:1; /* sent as 0 by the source to byte 12 */ + unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ /* bit 58 */ - int payload_1a:5; /* not currently used */ + unsigned int payload_1a:5;/* not currently used */ /* bits 63:59 */ - int payload_1b:8; /* not currently used */ + unsigned int payload_1b:8;/* not currently used */ /* bits 71:64 */ - int payload_1c:8; /* not currently used */ + unsigned int payload_1c:8;/* not currently used */ /* bits 79:72 */ - int payload_1d:2; /* not currently used */ + unsigned int payload_1d:2;/* not currently used */ /* bits 81:80 */ - int rsvd_4:7; /* must be zero */ + unsigned int rsvd_4:7; /* must be zero */ /* bits 88:82 */ - int sw_ack_flag:1; /* software acknowledge flag */ + unsigned int sw_ack_flag:1;/* software acknowledge flag */ /* bit 89 */ /* INTD trasactions at destination are to wait for software acknowledge */ - int rsvd_5:6; /* must be zero */ + unsigned int rsvd_5:6; /* must be zero */ /* bits 95:90 */ - int rsvd_6:5; /* must be zero */ + unsigned int rsvd_6:5; /* must be zero */ /* bits 100:96 */ - int int_both:1; /* if 1, interrupt both sockets on the blade */ + unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */ /* bit 101*/ - int fairness:3; /* usually zero */ + unsigned int fairness:3;/* usually zero */ /* bits 104:102 */ - int multilevel:1; /* multi-level multicast format */ + unsigned int multilevel:1; /* multi-level multicast format */ /* bit 105 */ /* 0 for TLB: endpoint multi-unicast messages */ - int chaining:1; /* next descriptor is part of this activation*/ + unsigned int chaining:1;/* next descriptor is part of this activation*/ /* bit 106 */ - int rsvd_7:21; /* must be zero */ + unsigned int rsvd_7:21; /* must be zero */ /* bits 127:107 */ }; -- cgit v1.2.3 From 7820b75643a763abf595c99fab963000ffc8b5f0 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 30 Dec 2008 22:05:55 +0530 Subject: x86: xsave.c: restore_user_xstate should be static Impact: cleanup, reduce kernel size a bit, avoid sparse warning Fixes sparse warning: arch/x86/kernel/xsave.c:162:5: warning: symbol 'restore_user_xstate' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Signed-off-by: H. Peter Anvin --- arch/x86/kernel/xsave.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 15c3e6999182..2b54fe002e94 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf) * Restore the extended state if present. Otherwise, restore the FP/SSE * state. */ -int restore_user_xstate(void __user *buf) +static int restore_user_xstate(void __user *buf) { struct _fpx_sw_bytes fx_sw_user; u64 mask; -- cgit v1.2.3 From 457533a7d3402d1d91fbc125c8bd1bd16dcd3cd4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 31 Dec 2008 15:11:37 +0100 Subject: [PATCH] fix scaled & unscaled cputime accounting The utimescaled / stimescaled fields in the task structure and the global cpustat should be set on all architectures. On s390 the calls to account_user_time_scaled and account_system_time_scaled never have been added. In addition system time that is accounted as guest time to the user time of a process is accounted to the scaled system time instead of the scaled user time. To fix the bugs and to prevent future forgetfulness this patch merges account_system_time_scaled into account_system_time and account_user_time_scaled into account_user_time. Cc: Benjamin Herrenschmidt Cc: Hidetoshi Seto Cc: Tony Luck Cc: Jeremy Fitzhardinge Cc: Chris Wright Cc: Michael Neuling Acked-by: Paul Mackerras Signed-off-by: Martin Schwidefsky --- arch/ia64/kernel/time.c | 12 ++++-------- arch/powerpc/kernel/time.c | 7 ++----- arch/s390/kernel/vtime.c | 10 +++++----- include/linux/kernel_stat.h | 6 ++---- kernel/sched.c | 41 ++++++++++++++++------------------------- kernel/time/tick-sched.c | 5 +++-- kernel/timer.c | 12 +++++------- 7 files changed, 37 insertions(+), 56 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 65c10a42c88f..4ee367817049 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -93,13 +93,11 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next) now = ia64_get_itc(); delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); - account_system_time(prev, 0, delta_stime); - account_system_time_scaled(prev, delta_stime); + account_system_time(prev, 0, delta_stime, delta_stime); if (pi->ac_utime) { delta_utime = cycle_to_cputime(pi->ac_utime); - account_user_time(prev, delta_utime); - account_user_time_scaled(prev, delta_utime); + account_user_time(prev, delta_utime, delta_utime); } pi->ac_stamp = ni->ac_stamp = now; @@ -122,8 +120,7 @@ void account_system_vtime(struct task_struct *tsk) now = ia64_get_itc(); delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); - account_system_time(tsk, 0, delta_stime); - account_system_time_scaled(tsk, delta_stime); + account_system_time(tsk, 0, delta_stime, delta_stime); ti->ac_stime = 0; ti->ac_stamp = now; @@ -143,8 +140,7 @@ void account_process_tick(struct task_struct *p, int user_tick) if (ti->ac_utime) { delta_utime = cycle_to_cputime(ti->ac_utime); - account_user_time(p, delta_utime); - account_user_time_scaled(p, delta_utime); + account_user_time(p, delta_utime, delta_utime); ti->ac_utime = 0; } } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e1f3a5140429..92650ccad2e1 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -256,8 +256,7 @@ void account_system_vtime(struct task_struct *tsk) delta += sys_time; get_paca()->system_time = 0; } - account_system_time(tsk, 0, delta); - account_system_time_scaled(tsk, deltascaled); + account_system_time(tsk, 0, delta, deltascaled); per_cpu(cputime_last_delta, smp_processor_id()) = delta; per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; local_irq_restore(flags); @@ -275,10 +274,8 @@ void account_process_tick(struct task_struct *tsk, int user_tick) utime = get_paca()->user_time; get_paca()->user_time = 0; - account_user_time(tsk, utime); - utimescaled = cputime_to_scaled(utime); - account_user_time_scaled(tsk, utimescaled); + account_user_time(tsk, utime, utimescaled); } /* diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 75a6e62ea973..07283aea2e56 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -50,12 +50,12 @@ void account_process_tick(struct task_struct *tsk, int user_tick) rcu_user_flag = cputime != 0; S390_lowcore.user_timer -= cputime << 12; S390_lowcore.steal_clock -= cputime << 12; - account_user_time(tsk, cputime); + account_user_time(tsk, cputime, cputime); cputime = S390_lowcore.system_timer >> 12; S390_lowcore.system_timer -= cputime << 12; S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, HARDIRQ_OFFSET, cputime); + account_system_time(tsk, HARDIRQ_OFFSET, cputime, cputime); cputime = S390_lowcore.steal_clock; if ((__s64) cputime > 0) { @@ -82,12 +82,12 @@ void account_vtime(struct task_struct *tsk) cputime = S390_lowcore.user_timer >> 12; S390_lowcore.user_timer -= cputime << 12; S390_lowcore.steal_clock -= cputime << 12; - account_user_time(tsk, cputime); + account_user_time(tsk, cputime, cputime); cputime = S390_lowcore.system_timer >> 12; S390_lowcore.system_timer -= cputime << 12; S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, 0, cputime); + account_system_time(tsk, 0, cputime, cputime); } /* @@ -107,7 +107,7 @@ void account_system_vtime(struct task_struct *tsk) cputime = S390_lowcore.system_timer >> 12; S390_lowcore.system_timer -= cputime << 12; S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, 0, cputime); + account_system_time(tsk, 0, cputime, cputime); } EXPORT_SYMBOL_GPL(account_system_vtime); diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 4ee4b3d2316f..c78a459662a6 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -79,10 +79,8 @@ static inline unsigned int kstat_irqs(unsigned int irq) } extern unsigned long long task_delta_exec(struct task_struct *); -extern void account_user_time(struct task_struct *, cputime_t); -extern void account_user_time_scaled(struct task_struct *, cputime_t); -extern void account_system_time(struct task_struct *, int, cputime_t); -extern void account_system_time_scaled(struct task_struct *, cputime_t); +extern void account_user_time(struct task_struct *, cputime_t, cputime_t); +extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); extern void account_steal_time(struct task_struct *, cputime_t); #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/kernel/sched.c b/kernel/sched.c index fff1c4a20b65..5b03679ff712 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4080,13 +4080,17 @@ unsigned long long task_delta_exec(struct task_struct *p) * Account user cpu time to a process. * @p: the process that the cpu time gets accounted to * @cputime: the cpu time spent in user space since the last update + * @cputime_scaled: cputime scaled by cpu frequency */ -void account_user_time(struct task_struct *p, cputime_t cputime) +void account_user_time(struct task_struct *p, cputime_t cputime, + cputime_t cputime_scaled) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; cputime64_t tmp; + /* Add user time to process. */ p->utime = cputime_add(p->utime, cputime); + p->utimescaled = cputime_add(p->utimescaled, cputime_scaled); account_group_user_time(p, cputime); /* Add user time to cpustat. */ @@ -4103,51 +4107,49 @@ void account_user_time(struct task_struct *p, cputime_t cputime) * Account guest cpu time to a process. * @p: the process that the cpu time gets accounted to * @cputime: the cpu time spent in virtual machine since the last update + * @cputime_scaled: cputime scaled by cpu frequency */ -static void account_guest_time(struct task_struct *p, cputime_t cputime) +static void account_guest_time(struct task_struct *p, cputime_t cputime, + cputime_t cputime_scaled) { cputime64_t tmp; struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; tmp = cputime_to_cputime64(cputime); + /* Add guest time to process. */ p->utime = cputime_add(p->utime, cputime); + p->utimescaled = cputime_add(p->utimescaled, cputime_scaled); account_group_user_time(p, cputime); p->gtime = cputime_add(p->gtime, cputime); + /* Add guest time to cpustat. */ cpustat->user = cputime64_add(cpustat->user, tmp); cpustat->guest = cputime64_add(cpustat->guest, tmp); } -/* - * Account scaled user cpu time to a process. - * @p: the process that the cpu time gets accounted to - * @cputime: the cpu time spent in user space since the last update - */ -void account_user_time_scaled(struct task_struct *p, cputime_t cputime) -{ - p->utimescaled = cputime_add(p->utimescaled, cputime); -} - /* * Account system cpu time to a process. * @p: the process that the cpu time gets accounted to * @hardirq_offset: the offset to subtract from hardirq_count() * @cputime: the cpu time spent in kernel space since the last update + * @cputime_scaled: cputime scaled by cpu frequency */ void account_system_time(struct task_struct *p, int hardirq_offset, - cputime_t cputime) + cputime_t cputime, cputime_t cputime_scaled) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; struct rq *rq = this_rq(); cputime64_t tmp; if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { - account_guest_time(p, cputime); + account_guest_time(p, cputime, cputime_scaled); return; } + /* Add system time to process. */ p->stime = cputime_add(p->stime, cputime); + p->stimescaled = cputime_add(p->stimescaled, cputime_scaled); account_group_system_time(p, cputime); /* Add system time to cpustat. */ @@ -4166,17 +4168,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset, acct_update_integrals(p); } -/* - * Account scaled system cpu time to a process. - * @p: the process that the cpu time gets accounted to - * @hardirq_offset: the offset to subtract from hardirq_count() - * @cputime: the cpu time spent in kernel space since the last update - */ -void account_system_time_scaled(struct task_struct *p, cputime_t cputime) -{ - p->stimescaled = cputime_add(p->stimescaled, cputime); -} - /* * Account for involuntary wait time. * @p: the process from which the cpu time has been stolen diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 8f3fc2582d38..1f2fce2479fe 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -420,6 +420,7 @@ void tick_nohz_restart_sched_tick(void) int cpu = smp_processor_id(); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); unsigned long ticks; + cputime_t cputime; ktime_t now; local_irq_disable(); @@ -452,8 +453,8 @@ void tick_nohz_restart_sched_tick(void) */ if (ticks && ticks < LONG_MAX) { add_preempt_count(HARDIRQ_OFFSET); - account_system_time(current, HARDIRQ_OFFSET, - jiffies_to_cputime(ticks)); + cputime = jiffies_to_cputime(ticks); + account_system_time(current, HARDIRQ_OFFSET, cputime, cputime); sub_preempt_count(HARDIRQ_OFFSET); } diff --git a/kernel/timer.c b/kernel/timer.c index 566257d1dc10..b5efb528aa1d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1023,13 +1023,11 @@ void account_process_tick(struct task_struct *p, int user_tick) { cputime_t one_jiffy = jiffies_to_cputime(1); - if (user_tick) { - account_user_time(p, one_jiffy); - account_user_time_scaled(p, cputime_to_scaled(one_jiffy)); - } else { - account_system_time(p, HARDIRQ_OFFSET, one_jiffy); - account_system_time_scaled(p, cputime_to_scaled(one_jiffy)); - } + if (user_tick) + account_user_time(p, one_jiffy, cputime_to_scaled(one_jiffy)); + else + account_system_time(p, HARDIRQ_OFFSET, one_jiffy, + cputime_to_scaled(one_jiffy)); } #endif -- cgit v1.2.3 From 79741dd35713ff4f6fd0eafd59fa94e8a4ba922d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 31 Dec 2008 15:11:38 +0100 Subject: [PATCH] idle cputime accounting The cpu time spent by the idle process actually doing something is currently accounted as idle time. This is plain wrong, the architectures that support VIRT_CPU_ACCOUNTING=y can do better: distinguish between the time spent doing nothing and the time spent by idle doing work. The first is accounted with account_idle_time and the second with account_system_time. The architectures that use the account_xxx_time interface directly and not the account_xxx_ticks interface now need to do the check for the idle process in their arch code. In particular to improve the system vs true idle time accounting the arch code needs to measure the true idle time instead of just testing for the idle process. To improve the tick based accounting as well we would need an architecture primitive that can tell us if the pt_regs of the interrupted context points to the magic instruction that halts the cpu. In addition idle time is no more added to the stime of the idle process. This field now contains the system time of the idle process as it should be. On systems without VIRT_CPU_ACCOUNTING this will always be zero as every tick that occurs while idle is running will be accounted as idle time. This patch contains the necessary common code changes to be able to distinguish idle system time and true idle time. The architectures with support for VIRT_CPU_ACCOUNTING need some changes to exploit this. Signed-off-by: Martin Schwidefsky --- arch/ia64/kernel/time.c | 10 ++++-- arch/powerpc/kernel/process.c | 1 + arch/powerpc/kernel/time.c | 13 +++++-- arch/s390/kernel/vtime.c | 20 ++++++++--- arch/x86/xen/time.c | 10 +++--- include/linux/kernel_stat.h | 7 +++- include/linux/sched.h | 1 - kernel/sched.c | 80 ++++++++++++++++++++++++++++++++++--------- kernel/time/tick-sched.c | 13 ++++--- kernel/timer.c | 13 ------- 10 files changed, 114 insertions(+), 54 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 4ee367817049..f0ebb342409d 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -93,7 +93,10 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next) now = ia64_get_itc(); delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); - account_system_time(prev, 0, delta_stime, delta_stime); + if (idle_task(smp_processor_id()) != prev) + account_system_time(prev, 0, delta_stime, delta_stime); + else + account_idle_time(delta_stime); if (pi->ac_utime) { delta_utime = cycle_to_cputime(pi->ac_utime); @@ -120,7 +123,10 @@ void account_system_vtime(struct task_struct *tsk) now = ia64_get_itc(); delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); - account_system_time(tsk, 0, delta_stime, delta_stime); + if (irq_count() || idle_task(smp_processor_id()) != tsk) + account_system_time(tsk, 0, delta_stime, delta_stime); + else + account_idle_time(delta_stime); ti->ac_stime = 0; ti->ac_stamp = now; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 51b201ddf9a1..fb7049c054c0 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 92650ccad2e1..3be355c1cfa7 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -256,7 +256,10 @@ void account_system_vtime(struct task_struct *tsk) delta += sys_time; get_paca()->system_time = 0; } - account_system_time(tsk, 0, delta, deltascaled); + if (in_irq() || idle_task(smp_processor_id()) != tsk) + account_system_time(tsk, 0, delta, deltascaled); + else + account_idle_time(delta); per_cpu(cputime_last_delta, smp_processor_id()) = delta; per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; local_irq_restore(flags); @@ -335,8 +338,12 @@ void calculate_steal_time(void) tb = mftb(); purr = mfspr(SPRN_PURR); stolen = (tb - pme->tb) - (purr - pme->purr); - if (stolen > 0) - account_steal_time(current, stolen); + if (stolen > 0) { + if (idle_task(smp_processor_id()) != current) + account_steal_time(stolen); + else + account_idle_time(stolen); + } pme->tb = tb; pme->purr = purr; } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 07283aea2e56..4a4a34caec55 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -55,13 +55,19 @@ void account_process_tick(struct task_struct *tsk, int user_tick) cputime = S390_lowcore.system_timer >> 12; S390_lowcore.system_timer -= cputime << 12; S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, HARDIRQ_OFFSET, cputime, cputime); + if (idle_task(smp_processor_id()) != current) + account_system_time(tsk, HARDIRQ_OFFSET, cputime, cputime); + else + account_idle_time(cputime); cputime = S390_lowcore.steal_clock; if ((__s64) cputime > 0) { cputime >>= 12; S390_lowcore.steal_clock -= cputime << 12; - account_steal_time(tsk, cputime); + if (idle_task(smp_processor_id()) != current) + account_steal_time(cputime); + else + account_idle_time(cputime); } } @@ -87,7 +93,10 @@ void account_vtime(struct task_struct *tsk) cputime = S390_lowcore.system_timer >> 12; S390_lowcore.system_timer -= cputime << 12; S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, 0, cputime, cputime); + if (idle_task(smp_processor_id()) != current) + account_system_time(tsk, 0, cputime, cputime); + else + account_idle_time(cputime); } /* @@ -107,7 +116,10 @@ void account_system_vtime(struct task_struct *tsk) cputime = S390_lowcore.system_timer >> 12; S390_lowcore.system_timer -= cputime << 12; S390_lowcore.steal_clock -= cputime << 12; - account_system_time(tsk, 0, cputime, cputime); + if (in_irq() || idle_task(smp_processor_id()) != current) + account_system_time(tsk, 0, cputime, cputime); + else + account_idle_time(cputime); } EXPORT_SYMBOL_GPL(account_system_vtime); diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c9f7cda48ed7..732e52dc991a 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -132,8 +132,7 @@ static void do_stolen_accounting(void) *snap = state; /* Add the appropriate number of ticks of stolen time, - including any left-overs from last time. Passing NULL to - account_steal_time accounts the time as stolen. */ + including any left-overs from last time. */ stolen = runnable + offline + __get_cpu_var(residual_stolen); if (stolen < 0) @@ -141,11 +140,10 @@ static void do_stolen_accounting(void) ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); __get_cpu_var(residual_stolen) = stolen; - account_steal_time(NULL, ticks); + account_steal_ticks(ticks); /* Add the appropriate number of ticks of blocked time, - including any left-overs from last time. Passing idle to - account_steal_time accounts the time as idle/wait. */ + including any left-overs from last time. */ blocked += __get_cpu_var(residual_blocked); if (blocked < 0) @@ -153,7 +151,7 @@ static void do_stolen_accounting(void) ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); __get_cpu_var(residual_blocked) = blocked; - account_steal_time(idle_task(smp_processor_id()), ticks); + account_idle_ticks(ticks); } /* diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index c78a459662a6..570d20413119 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -81,6 +81,11 @@ static inline unsigned int kstat_irqs(unsigned int irq) extern unsigned long long task_delta_exec(struct task_struct *); extern void account_user_time(struct task_struct *, cputime_t, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); -extern void account_steal_time(struct task_struct *, cputime_t); +extern void account_steal_time(cputime_t); +extern void account_idle_time(cputime_t); + +extern void account_process_tick(struct task_struct *, int user); +extern void account_steal_ticks(unsigned long ticks); +extern void account_idle_ticks(unsigned long ticks); #endif /* _LINUX_KERNEL_STAT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 8395e715809d..b475d4db8053 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -284,7 +284,6 @@ long io_schedule_timeout(long timeout); extern void cpu_init (void); extern void trap_init(void); -extern void account_process_tick(struct task_struct *task, int user); extern void update_process_times(int user); extern void scheduler_tick(void); diff --git a/kernel/sched.c b/kernel/sched.c index 5b03679ff712..635eaffe1e4c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4139,7 +4139,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset, cputime_t cputime, cputime_t cputime_scaled) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; - struct rq *rq = this_rq(); cputime64_t tmp; if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { @@ -4158,37 +4157,84 @@ void account_system_time(struct task_struct *p, int hardirq_offset, cpustat->irq = cputime64_add(cpustat->irq, tmp); else if (softirq_count()) cpustat->softirq = cputime64_add(cpustat->softirq, tmp); - else if (p != rq->idle) - cpustat->system = cputime64_add(cpustat->system, tmp); - else if (atomic_read(&rq->nr_iowait) > 0) - cpustat->iowait = cputime64_add(cpustat->iowait, tmp); else - cpustat->idle = cputime64_add(cpustat->idle, tmp); + cpustat->system = cputime64_add(cpustat->system, tmp); + /* Account for system time used */ acct_update_integrals(p); } /* * Account for involuntary wait time. - * @p: the process from which the cpu time has been stolen * @steal: the cpu time spent in involuntary wait */ -void account_steal_time(struct task_struct *p, cputime_t steal) +void account_steal_time(cputime_t cputime) +{ + struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + cputime64_t cputime64 = cputime_to_cputime64(cputime); + + cpustat->steal = cputime64_add(cpustat->steal, cputime64); +} + +/* + * Account for idle time. + * @cputime: the cpu time spent in idle wait + */ +void account_idle_time(cputime_t cputime) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; - cputime64_t tmp = cputime_to_cputime64(steal); + cputime64_t cputime64 = cputime_to_cputime64(cputime); struct rq *rq = this_rq(); - if (p == rq->idle) { - p->stime = cputime_add(p->stime, steal); - if (atomic_read(&rq->nr_iowait) > 0) - cpustat->iowait = cputime64_add(cpustat->iowait, tmp); - else - cpustat->idle = cputime64_add(cpustat->idle, tmp); - } else - cpustat->steal = cputime64_add(cpustat->steal, tmp); + if (atomic_read(&rq->nr_iowait) > 0) + cpustat->iowait = cputime64_add(cpustat->iowait, cputime64); + else + cpustat->idle = cputime64_add(cpustat->idle, cputime64); +} + +#ifndef CONFIG_VIRT_CPU_ACCOUNTING + +/* + * Account a single tick of cpu time. + * @p: the process that the cpu time gets accounted to + * @user_tick: indicates if the tick is a user or a system tick + */ +void account_process_tick(struct task_struct *p, int user_tick) +{ + cputime_t one_jiffy = jiffies_to_cputime(1); + cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy); + struct rq *rq = this_rq(); + + if (user_tick) + account_user_time(p, one_jiffy, one_jiffy_scaled); + else if (p != rq->idle) + account_system_time(p, HARDIRQ_OFFSET, one_jiffy, + one_jiffy_scaled); + else + account_idle_time(one_jiffy); +} + +/* + * Account multiple ticks of steal time. + * @p: the process from which the cpu time has been stolen + * @ticks: number of stolen ticks + */ +void account_steal_ticks(unsigned long ticks) +{ + account_steal_time(jiffies_to_cputime(ticks)); +} + +/* + * Account multiple ticks of idle time. + * @ticks: number of stolen ticks + */ +void account_idle_ticks(unsigned long ticks) +{ + account_idle_time(jiffies_to_cputime(ticks)); } +#endif + /* * Use precise platform statistics if available: */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1f2fce2479fe..611fa4c0baab 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -419,8 +419,9 @@ void tick_nohz_restart_sched_tick(void) { int cpu = smp_processor_id(); struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); +#ifndef CONFIG_VIRT_CPU_ACCOUNTING unsigned long ticks; - cputime_t cputime; +#endif ktime_t now; local_irq_disable(); @@ -442,6 +443,7 @@ void tick_nohz_restart_sched_tick(void) tick_do_update_jiffies64(now); cpu_clear(cpu, nohz_cpu_mask); +#ifndef CONFIG_VIRT_CPU_ACCOUNTING /* * We stopped the tick in idle. Update process times would miss the * time we slept as update_process_times does only a 1 tick @@ -451,12 +453,9 @@ void tick_nohz_restart_sched_tick(void) /* * We might be one off. Do not randomly account a huge number of ticks! */ - if (ticks && ticks < LONG_MAX) { - add_preempt_count(HARDIRQ_OFFSET); - cputime = jiffies_to_cputime(ticks); - account_system_time(current, HARDIRQ_OFFSET, cputime, cputime); - sub_preempt_count(HARDIRQ_OFFSET); - } + if (ticks && ticks < LONG_MAX) + account_idle_ticks(ticks); +#endif touch_softlockup_watchdog(); /* diff --git a/kernel/timer.c b/kernel/timer.c index b5efb528aa1d..dee3f641a7a7 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1018,19 +1018,6 @@ unsigned long get_next_timer_interrupt(unsigned long now) } #endif -#ifndef CONFIG_VIRT_CPU_ACCOUNTING -void account_process_tick(struct task_struct *p, int user_tick) -{ - cputime_t one_jiffy = jiffies_to_cputime(1); - - if (user_tick) - account_user_time(p, one_jiffy, cputime_to_scaled(one_jiffy)); - else - account_system_time(p, HARDIRQ_OFFSET, one_jiffy, - cputime_to_scaled(one_jiffy)); -} -#endif - /* * Called from the timer interrupt handler to charge one tick to the current * process. user_tick is 1 if the tick is user time, 0 for system. -- cgit v1.2.3 From aa5e97ce4bbc9d5daeec16b1d15bb3f6b7b4f4d4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 31 Dec 2008 15:11:39 +0100 Subject: [PATCH] improve precision of process accounting. The unit of the cputime accouting values that are stored per process is currently a microsecond. The CPU timer has a maximum granularity of 2**-12 microseconds. There is no benefit in storing the per process values in the lesser precision and there is the disadvantage that the backend has to do the rounding to microseconds. The better solution is to use the maximum granularity of the CPU timer as cputime unit. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cputime.h | 42 ++++++++--------- arch/s390/include/asm/lowcore.h | 40 ++++++++-------- arch/s390/include/asm/system.h | 4 +- arch/s390/include/asm/thread_info.h | 2 + arch/s390/kernel/vtime.c | 93 ++++++++++++++++--------------------- 5 files changed, 85 insertions(+), 96 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 133ce054fc89..521726430afa 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -11,7 +11,7 @@ #include -/* We want to use micro-second resolution. */ +/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ typedef unsigned long long cputime_t; typedef unsigned long long cputime64_t; @@ -53,9 +53,9 @@ __div(unsigned long long n, unsigned int base) #define cputime_ge(__a, __b) ((__a) >= (__b)) #define cputime_lt(__a, __b) ((__a) < (__b)) #define cputime_le(__a, __b) ((__a) <= (__b)) -#define cputime_to_jiffies(__ct) (__div((__ct), 1000000 / HZ)) +#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ)) #define cputime_to_scaled(__ct) (__ct) -#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (1000000 / HZ)) +#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ)) #define cputime64_zero (0ULL) #define cputime64_add(__a, __b) ((__a) + (__b)) @@ -64,7 +64,7 @@ __div(unsigned long long n, unsigned int base) static inline u64 cputime64_to_jiffies64(cputime64_t cputime) { - do_div(cputime, 1000000 / HZ); + do_div(cputime, 4096000000ULL / HZ); return cputime; } @@ -74,13 +74,13 @@ cputime64_to_jiffies64(cputime64_t cputime) static inline unsigned int cputime_to_msecs(const cputime_t cputime) { - return __div(cputime, 1000); + return __div(cputime, 4096000); } static inline cputime_t msecs_to_cputime(const unsigned int m) { - return (cputime_t) m * 1000; + return (cputime_t) m * 4096000; } /* @@ -89,13 +89,13 @@ msecs_to_cputime(const unsigned int m) static inline unsigned int cputime_to_secs(const cputime_t cputime) { - return __div(cputime, 1000000); + return __div(cputime, 2048000000) >> 1; } static inline cputime_t secs_to_cputime(const unsigned int s) { - return (cputime_t) s * 1000000; + return (cputime_t) s * 4096000000ULL; } /* @@ -104,7 +104,7 @@ secs_to_cputime(const unsigned int s) static inline cputime_t timespec_to_cputime(const struct timespec *value) { - return value->tv_nsec / 1000 + (u64) value->tv_sec * 1000000; + return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL; } static inline void @@ -114,12 +114,12 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value) register_pair rp; rp.pair = cputime >> 1; - asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); - value->tv_nsec = rp.subreg.even * 1000; + asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); + value->tv_nsec = rp.subreg.even * 1000 / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_nsec = (cputime % 1000000) * 1000; - value->tv_sec = cputime / 1000000; + value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096; + value->tv_sec = cputime / 4096000000ULL; #endif } @@ -131,7 +131,7 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value) static inline cputime_t timeval_to_cputime(const struct timeval *value) { - return value->tv_usec + (u64) value->tv_sec * 1000000; + return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL; } static inline void @@ -141,12 +141,12 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value) register_pair rp; rp.pair = cputime >> 1; - asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); - value->tv_usec = rp.subreg.even; + asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); + value->tv_usec = rp.subreg.even / 4096; value->tv_sec = rp.subreg.odd; #else - value->tv_usec = cputime % 1000000; - value->tv_sec = cputime / 1000000; + value->tv_usec = cputime % 4096000000ULL; + value->tv_sec = cputime / 4096000000ULL; #endif } @@ -156,13 +156,13 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value) static inline clock_t cputime_to_clock_t(cputime_t cputime) { - return __div(cputime, 1000000 / USER_HZ); + return __div(cputime, 4096000000ULL / USER_HZ); } static inline cputime_t clock_t_to_cputime(unsigned long x) { - return (cputime_t) x * (1000000 / USER_HZ); + return (cputime_t) x * (4096000000ULL / USER_HZ); } /* @@ -171,7 +171,7 @@ clock_t_to_cputime(unsigned long x) static inline clock_t cputime64_to_clock_t(cputime64_t cputime) { - return __div(cputime, 1000000 / USER_HZ); + return __div(cputime, 4096000000ULL / USER_HZ); } #endif /* _S390_CPUTIME_H */ diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 0bc51d52a899..a547817cf1ab 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -67,11 +67,11 @@ #define __LC_SYNC_ENTER_TIMER 0x248 #define __LC_ASYNC_ENTER_TIMER 0x250 #define __LC_EXIT_TIMER 0x258 -#define __LC_LAST_UPDATE_TIMER 0x260 -#define __LC_USER_TIMER 0x268 -#define __LC_SYSTEM_TIMER 0x270 -#define __LC_LAST_UPDATE_CLOCK 0x278 -#define __LC_STEAL_CLOCK 0x280 +#define __LC_USER_TIMER 0x260 +#define __LC_SYSTEM_TIMER 0x268 +#define __LC_STEAL_TIMER 0x270 +#define __LC_LAST_UPDATE_TIMER 0x278 +#define __LC_LAST_UPDATE_CLOCK 0x280 #define __LC_RETURN_MCCK_PSW 0x288 #define __LC_KERNEL_STACK 0xC40 #define __LC_THREAD_INFO 0xC44 @@ -89,11 +89,11 @@ #define __LC_SYNC_ENTER_TIMER 0x250 #define __LC_ASYNC_ENTER_TIMER 0x258 #define __LC_EXIT_TIMER 0x260 -#define __LC_LAST_UPDATE_TIMER 0x268 -#define __LC_USER_TIMER 0x270 -#define __LC_SYSTEM_TIMER 0x278 -#define __LC_LAST_UPDATE_CLOCK 0x280 -#define __LC_STEAL_CLOCK 0x288 +#define __LC_USER_TIMER 0x268 +#define __LC_SYSTEM_TIMER 0x270 +#define __LC_STEAL_TIMER 0x278 +#define __LC_LAST_UPDATE_TIMER 0x280 +#define __LC_LAST_UPDATE_CLOCK 0x288 #define __LC_RETURN_MCCK_PSW 0x290 #define __LC_KERNEL_STACK 0xD40 #define __LC_THREAD_INFO 0xD48 @@ -252,11 +252,11 @@ struct _lowcore __u64 sync_enter_timer; /* 0x248 */ __u64 async_enter_timer; /* 0x250 */ __u64 exit_timer; /* 0x258 */ - __u64 last_update_timer; /* 0x260 */ - __u64 user_timer; /* 0x268 */ - __u64 system_timer; /* 0x270 */ - __u64 last_update_clock; /* 0x278 */ - __u64 steal_clock; /* 0x280 */ + __u64 user_timer; /* 0x260 */ + __u64 system_timer; /* 0x268 */ + __u64 steal_timer; /* 0x270 */ + __u64 last_update_timer; /* 0x278 */ + __u64 last_update_clock; /* 0x280 */ psw_t return_mcck_psw; /* 0x288 */ __u8 pad8[0xc00-0x290]; /* 0x290 */ @@ -343,11 +343,11 @@ struct _lowcore __u64 sync_enter_timer; /* 0x250 */ __u64 async_enter_timer; /* 0x258 */ __u64 exit_timer; /* 0x260 */ - __u64 last_update_timer; /* 0x268 */ - __u64 user_timer; /* 0x270 */ - __u64 system_timer; /* 0x278 */ - __u64 last_update_clock; /* 0x280 */ - __u64 steal_clock; /* 0x288 */ + __u64 user_timer; /* 0x268 */ + __u64 system_timer; /* 0x270 */ + __u64 steal_timer; /* 0x278 */ + __u64 last_update_timer; /* 0x280 */ + __u64 last_update_clock; /* 0x288 */ psw_t return_mcck_psw; /* 0x290 */ __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */ /* System info area */ diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 024ef42ed6d7..3a8b26eb1f2e 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -99,7 +99,7 @@ static inline void restore_access_regs(unsigned int *acrs) prev = __switch_to(prev,next); \ } while (0) -extern void account_vtime(struct task_struct *); +extern void account_vtime(struct task_struct *, struct task_struct *); extern void account_tick_vtime(struct task_struct *); extern void account_system_vtime(struct task_struct *); @@ -121,7 +121,7 @@ static inline void cmma_init(void) { } #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ - account_vtime(prev); \ + account_vtime(prev, current); \ } while (0) #define nop() asm volatile("nop") diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index c1eaf9604da7..c544aa524535 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -47,6 +47,8 @@ struct thread_info { unsigned int cpu; /* current CPU */ int preempt_count; /* 0 => preemptable, <0 => BUG */ struct restart_block restart_block; + __u64 user_timer; + __u64 system_timer; }; /* diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 4a4a34caec55..1254a4d0d762 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -31,11 +31,10 @@ static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void account_process_tick(struct task_struct *tsk, int user_tick) +static void do_account_vtime(struct task_struct *tsk, int hardirq_offset) { - cputime_t cputime; - __u64 timer, clock; - int rcu_user_flag; + struct thread_info *ti = task_thread_info(tsk); + __u64 timer, clock, user, system, steal; timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; @@ -44,59 +43,47 @@ void account_process_tick(struct task_struct *tsk, int user_tick) : "=m" (S390_lowcore.last_update_timer), "=m" (S390_lowcore.last_update_clock) ); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock; + S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock; - cputime = S390_lowcore.user_timer >> 12; - rcu_user_flag = cputime != 0; - S390_lowcore.user_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_user_time(tsk, cputime, cputime); + user = S390_lowcore.user_timer - ti->user_timer; + S390_lowcore.steal_timer -= user; + ti->user_timer = S390_lowcore.user_timer; + account_user_time(tsk, user, user); - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; + system = S390_lowcore.system_timer - ti->system_timer; + S390_lowcore.steal_timer -= system; + ti->system_timer = S390_lowcore.system_timer; if (idle_task(smp_processor_id()) != current) - account_system_time(tsk, HARDIRQ_OFFSET, cputime, cputime); + account_system_time(tsk, hardirq_offset, system, system); else - account_idle_time(cputime); + account_idle_time(system); - cputime = S390_lowcore.steal_clock; - if ((__s64) cputime > 0) { - cputime >>= 12; - S390_lowcore.steal_clock -= cputime << 12; + steal = S390_lowcore.steal_timer; + if ((s64) steal > 0) { + S390_lowcore.steal_timer = 0; if (idle_task(smp_processor_id()) != current) - account_steal_time(cputime); + account_steal_time(steal); else - account_idle_time(cputime); + account_idle_time(steal); } } -/* - * Update process times based on virtual cpu times stored by entry.S - * to the lowcore fields user_timer, system_timer & steal_clock. - */ -void account_vtime(struct task_struct *tsk) +void account_vtime(struct task_struct *prev, struct task_struct *next) { - cputime_t cputime; - __u64 timer; - - timer = S390_lowcore.last_update_timer; - asm volatile (" STPT %0" /* Store current cpu timer value */ - : "=m" (S390_lowcore.last_update_timer) ); - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - - cputime = S390_lowcore.user_timer >> 12; - S390_lowcore.user_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - account_user_time(tsk, cputime, cputime); + struct thread_info *ti; + + do_account_vtime(prev, 0); + ti = task_thread_info(prev); + ti->user_timer = S390_lowcore.user_timer; + ti->system_timer = S390_lowcore.system_timer; + ti = task_thread_info(next); + S390_lowcore.user_timer = ti->user_timer; + S390_lowcore.system_timer = ti->system_timer; +} - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; - if (idle_task(smp_processor_id()) != current) - account_system_time(tsk, 0, cputime, cputime); - else - account_idle_time(cputime); +void account_process_tick(struct task_struct *tsk, int user_tick) +{ + do_account_vtime(tsk, HARDIRQ_OFFSET); } /* @@ -105,21 +92,21 @@ void account_vtime(struct task_struct *tsk) */ void account_system_vtime(struct task_struct *tsk) { - cputime_t cputime; - __u64 timer; + struct thread_info *ti = task_thread_info(tsk); + __u64 timer, system; timer = S390_lowcore.last_update_timer; asm volatile (" STPT %0" /* Store current cpu timer value */ : "=m" (S390_lowcore.last_update_timer) ); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - cputime = S390_lowcore.system_timer >> 12; - S390_lowcore.system_timer -= cputime << 12; - S390_lowcore.steal_clock -= cputime << 12; + system = S390_lowcore.system_timer - ti->system_timer; + S390_lowcore.steal_timer -= system; + ti->system_timer = S390_lowcore.system_timer; if (in_irq() || idle_task(smp_processor_id()) != current) - account_system_time(tsk, 0, cputime, cputime); + account_system_time(tsk, 0, system, system); else - account_idle_time(cputime); + account_idle_time(system); } EXPORT_SYMBOL_GPL(account_system_vtime); @@ -490,8 +477,8 @@ void init_cpu_vtimer(void) /* kick the virtual timer */ S390_lowcore.exit_timer = VTIMER_MAX_SLICE; S390_lowcore.last_update_timer = VTIMER_MAX_SLICE; - asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer)); asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock)); + asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer)); /* enable cpu timer interrupts */ __ctl_set_bit(0,10); -- cgit v1.2.3 From 6f43092441bda528dd38f2dc6c1e2522c5079fb7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 31 Dec 2008 15:11:40 +0100 Subject: [PATCH] improve precision of idle time detection. Increase the precision of the idle time calculation that is exported to user space via /sys/devices/system/cpu/cpu/idle_time_us Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu.h | 3 +- arch/s390/kernel/process.c | 67 ++++++++++++++++++++++++++++++--------------- arch/s390/kernel/smp.c | 25 +++++++++-------- arch/s390/kernel/vtime.c | 3 +- 4 files changed, 61 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h index e5a6a9ba3adf..89456df43c4a 100644 --- a/arch/s390/include/asm/cpu.h +++ b/arch/s390/include/asm/cpu.h @@ -14,7 +14,6 @@ struct s390_idle_data { spinlock_t lock; - unsigned int in_idle; unsigned long long idle_count; unsigned long long idle_enter; unsigned long long idle_time; @@ -26,7 +25,7 @@ void s390_idle_leave(void); static inline void s390_idle_check(void) { - if ((&__get_cpu_var(s390_idle))->in_idle) + if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL) s390_idle_leave(); } diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 04f8c67a6101..1e06436f07c2 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -79,30 +80,19 @@ DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = { .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock) }; -static int s390_idle_enter(void) +void s390_idle_leave(void) { struct s390_idle_data *idle; + unsigned long long idle_time; idle = &__get_cpu_var(s390_idle); + idle_time = S390_lowcore.int_clock - idle->idle_enter; spin_lock(&idle->lock); + idle->idle_time += idle_time; + idle->idle_enter = 0ULL; idle->idle_count++; - idle->in_idle = 1; - idle->idle_enter = get_clock(); spin_unlock(&idle->lock); - vtime_stop_cpu_timer(); - return NOTIFY_OK; -} - -void s390_idle_leave(void) -{ - struct s390_idle_data *idle; - vtime_start_cpu_timer(); - idle = &__get_cpu_var(s390_idle); - spin_lock(&idle->lock); - idle->idle_time += get_clock() - idle->idle_enter; - idle->in_idle = 0; - spin_unlock(&idle->lock); } extern void s390_handle_mcck(void); @@ -111,16 +101,16 @@ extern void s390_handle_mcck(void); */ static void default_idle(void) { + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + unsigned long addr; + psw_t psw; + /* CPU is going idle. */ local_irq_disable(); if (need_resched()) { local_irq_enable(); return; } - if (s390_idle_enter() == NOTIFY_BAD) { - local_irq_enable(); - return; - } #ifdef CONFIG_HOTPLUG_CPU if (cpu_is_offline(smp_processor_id())) { preempt_enable_no_resched(); @@ -138,9 +128,42 @@ static void default_idle(void) trace_hardirqs_on(); /* Don't trace preempt off for idle. */ stop_critical_timings(); + vtime_stop_cpu_timer(); + + /* + * The inline assembly is equivalent to + * idle->idle_enter = get_clock(); + * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | + * PSW_MASK_IO | PSW_MASK_EXT); + * The difference is that the inline assembly makes sure that + * the stck instruction is right before the lpsw instruction. + * This is done to increase the precision. + */ + /* Wait for external, I/O or machine check interrupt. */ - __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - PSW_MASK_IO | PSW_MASK_EXT); + psw.mask = psw_kernel_bits|PSW_MASK_WAIT|PSW_MASK_IO|PSW_MASK_EXT; +#ifndef __s390x__ + asm volatile( + " basr %0,0\n" + "0: ahi %0,1f-0b\n" + " st %0,4(%2)\n" + " stck 0(%3)\n" + " lpsw 0(%2)\n" + "1:" + : "=&d" (addr), "=m" (idle->idle_enter) + : "a" (&psw), "a" (&idle->idle_enter), "m" (psw) + : "memory", "cc"); +#else /* __s390x__ */ + asm volatile( + " larl %0,1f\n" + " stg %0,8(%2)\n" + " stck 0(%3)\n" + " lpswe 0(%2)\n" + "1:" + : "=&d" (addr), "=m" (idle->idle_enter) + : "a" (&psw), "a" (&idle->idle_enter), "m" (psw) + : "memory", "cc"); +#endif /* __s390x__ */ start_critical_timings(); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 6fc78541dc57..3979a6fc0882 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -851,9 +851,11 @@ static ssize_t show_idle_count(struct sys_device *dev, unsigned long long idle_count; idle = &per_cpu(s390_idle, dev->id); - spin_lock_irq(&idle->lock); + spin_lock(&idle->lock); idle_count = idle->idle_count; - spin_unlock_irq(&idle->lock); + if (idle->idle_enter) + idle_count++; + spin_unlock(&idle->lock); return sprintf(buf, "%llu\n", idle_count); } static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); @@ -862,18 +864,17 @@ static ssize_t show_idle_time(struct sys_device *dev, struct sysdev_attribute *attr, char *buf) { struct s390_idle_data *idle; - unsigned long long new_time; + unsigned long long now, idle_time, idle_enter; idle = &per_cpu(s390_idle, dev->id); - spin_lock_irq(&idle->lock); - if (idle->in_idle) { - new_time = get_clock(); - idle->idle_time += new_time - idle->idle_enter; - idle->idle_enter = new_time; - } - new_time = idle->idle_time; - spin_unlock_irq(&idle->lock); - return sprintf(buf, "%llu\n", new_time >> 12); + spin_lock(&idle->lock); + now = get_clock(); + idle_time = idle->idle_time; + idle_enter = idle->idle_enter; + if (idle_enter != 0ULL && idle_enter < now) + idle_time += now - idle_enter; + spin_unlock(&idle->lock); + return sprintf(buf, "%llu\n", idle_time >> 12); } static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL); diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 1254a4d0d762..25d21fef76ba 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -112,6 +112,7 @@ EXPORT_SYMBOL_GPL(account_system_vtime); static inline void set_vtimer(__u64 expires) { + struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); __u64 timer; asm volatile (" STPT %0\n" /* Store current cpu timer value */ @@ -121,7 +122,7 @@ static inline void set_vtimer(__u64 expires) S390_lowcore.last_update_timer = expires; /* store expire time for this CPU timer */ - __get_cpu_var(virt_cpu_timer).to_expire = expires; + vq->to_expire = expires; } void vtime_start_cpu_timer(void) -- cgit v1.2.3 From 9cfb9b3c3a7361c793c031e9c3583b177ac5debd Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 31 Dec 2008 15:11:41 +0100 Subject: [PATCH] improve idle cputime accounting Distinguish the cputime of the idle process where idle is actually using cpu cycles from the cputime where idle is sleeping on an enabled wait psw. The former is accounted as system time, the later as idle time. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cpu.h | 4 +- arch/s390/include/asm/timer.h | 16 +- arch/s390/kernel/entry.S | 5 +- arch/s390/kernel/entry64.S | 5 +- arch/s390/kernel/process.c | 64 +------ arch/s390/kernel/s390_ext.c | 2 +- arch/s390/kernel/vtime.c | 412 +++++++++++++++++++++++------------------- drivers/s390/cio/cio.c | 2 +- drivers/s390/s390mach.c | 3 + 9 files changed, 248 insertions(+), 265 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h index 89456df43c4a..d60a2eefb17b 100644 --- a/arch/s390/include/asm/cpu.h +++ b/arch/s390/include/asm/cpu.h @@ -21,12 +21,12 @@ struct s390_idle_data { DECLARE_PER_CPU(struct s390_idle_data, s390_idle); -void s390_idle_leave(void); +void vtime_start_cpu(void); static inline void s390_idle_check(void) { if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL) - s390_idle_leave(); + vtime_start_cpu(); } #endif /* _ASM_S390_CPU_H_ */ diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h index 61705d60f995..e4bcab739c19 100644 --- a/arch/s390/include/asm/timer.h +++ b/arch/s390/include/asm/timer.h @@ -23,20 +23,18 @@ struct vtimer_list { __u64 expires; __u64 interval; - spinlock_t lock; - unsigned long magic; - void (*function)(unsigned long); unsigned long data; }; -/* the offset value will wrap after ca. 71 years */ +/* the vtimer value will wrap after ca. 71 years */ struct vtimer_queue { struct list_head list; spinlock_t lock; - __u64 to_expire; /* current event expire time */ - __u64 offset; /* list offset to zero */ - __u64 idle; /* temp var for idle */ + __u64 timer; /* last programmed timer */ + __u64 elapsed; /* elapsed time of timer expire values */ + __u64 idle; /* temp var for idle */ + int do_spt; /* =1: reprogram cpu timer in idle */ }; extern void init_virt_timer(struct vtimer_list *timer); @@ -48,8 +46,8 @@ extern int del_virt_timer(struct vtimer_list *timer); extern void init_cpu_vtimer(void); extern void vtime_init(void); -extern void vtime_start_cpu_timer(void); -extern void vtime_stop_cpu_timer(void); +extern void vtime_stop_cpu(void); +extern void vtime_start_leave(void); #endif /* __KERNEL__ */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 55de521aef77..1268aa2991bf 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -583,8 +583,8 @@ kernel_per: .globl io_int_handler io_int_handler: - stpt __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+16 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 @@ -723,8 +723,8 @@ io_notify_resume: .globl ext_int_handler ext_int_handler: - stpt __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+16 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 @@ -750,6 +750,7 @@ __critical_end: .globl mcck_int_handler mcck_int_handler: + stck __LC_INT_CLOCK spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs SAVE_ALL_BASE __LC_SAVE_AREA+32 diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 16bb4fd1a403..ae83c195171c 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -559,8 +559,8 @@ kernel_per: */ .globl io_int_handler io_int_handler: - stpt __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+32 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 @@ -721,8 +721,8 @@ io_notify_resume: */ .globl ext_int_handler ext_int_handler: - stpt __LC_ASYNC_ENTER_TIMER stck __LC_INT_CLOCK + stpt __LC_ASYNC_ENTER_TIMER SAVE_ALL_BASE __LC_SAVE_AREA+32 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 @@ -746,6 +746,7 @@ __critical_end: */ .globl mcck_int_handler mcck_int_handler: + stck __LC_INT_CLOCK la %r1,4095 # revalidate r1 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 1e06436f07c2..b6110bdf8dc2 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -46,7 +46,6 @@ #include #include #include -#include #include "entry.h" asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); @@ -76,35 +75,12 @@ unsigned long thread_saved_pc(struct task_struct *tsk) return sf->gprs[8]; } -DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = { - .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock) -}; - -void s390_idle_leave(void) -{ - struct s390_idle_data *idle; - unsigned long long idle_time; - - idle = &__get_cpu_var(s390_idle); - idle_time = S390_lowcore.int_clock - idle->idle_enter; - spin_lock(&idle->lock); - idle->idle_time += idle_time; - idle->idle_enter = 0ULL; - idle->idle_count++; - spin_unlock(&idle->lock); - vtime_start_cpu_timer(); -} - extern void s390_handle_mcck(void); /* * The idle loop on a S390... */ static void default_idle(void) { - struct s390_idle_data *idle = &__get_cpu_var(s390_idle); - unsigned long addr; - psw_t psw; - /* CPU is going idle. */ local_irq_disable(); if (need_resched()) { @@ -120,7 +96,6 @@ static void default_idle(void) local_mcck_disable(); if (test_thread_flag(TIF_MCCK_PENDING)) { local_mcck_enable(); - s390_idle_leave(); local_irq_enable(); s390_handle_mcck(); return; @@ -128,42 +103,9 @@ static void default_idle(void) trace_hardirqs_on(); /* Don't trace preempt off for idle. */ stop_critical_timings(); - vtime_stop_cpu_timer(); - - /* - * The inline assembly is equivalent to - * idle->idle_enter = get_clock(); - * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | - * PSW_MASK_IO | PSW_MASK_EXT); - * The difference is that the inline assembly makes sure that - * the stck instruction is right before the lpsw instruction. - * This is done to increase the precision. - */ - - /* Wait for external, I/O or machine check interrupt. */ - psw.mask = psw_kernel_bits|PSW_MASK_WAIT|PSW_MASK_IO|PSW_MASK_EXT; -#ifndef __s390x__ - asm volatile( - " basr %0,0\n" - "0: ahi %0,1f-0b\n" - " st %0,4(%2)\n" - " stck 0(%3)\n" - " lpsw 0(%2)\n" - "1:" - : "=&d" (addr), "=m" (idle->idle_enter) - : "a" (&psw), "a" (&idle->idle_enter), "m" (psw) - : "memory", "cc"); -#else /* __s390x__ */ - asm volatile( - " larl %0,1f\n" - " stg %0,8(%2)\n" - " stck 0(%3)\n" - " lpswe 0(%2)\n" - "1:" - : "=&d" (addr), "=m" (idle->idle_enter) - : "a" (&psw), "a" (&idle->idle_enter), "m" (psw) - : "memory", "cc"); -#endif /* __s390x__ */ + /* Stop virtual timer and halt the cpu. */ + vtime_stop_cpu(); + /* Reenable preemption tracer. */ start_critical_timings(); } diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c index e019b419efc6..a0d2d55d7fb3 100644 --- a/arch/s390/kernel/s390_ext.c +++ b/arch/s390/kernel/s390_ext.c @@ -119,8 +119,8 @@ void do_extint(struct pt_regs *regs, unsigned short code) struct pt_regs *old_regs; old_regs = set_irq_regs(regs); - irq_enter(); s390_idle_check(); + irq_enter(); if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) /* Serve timer interrupts first. */ clock_comparator_work(); diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 25d21fef76ba..2fb36e462194 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -23,10 +23,35 @@ #include #include #include +#include static ext_int_info_t ext_int_info_timer; + static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); +DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = { + .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock) +}; + +static inline __u64 get_vtimer(void) +{ + __u64 timer; + + asm volatile("STPT %0" : "=m" (timer)); + return timer; +} + +static inline void set_vtimer(__u64 expires) +{ + __u64 timer; + + asm volatile (" STPT %0\n" /* Store current cpu timer value */ + " SPT %1" /* Set new value immediatly afterwards */ + : "=m" (timer) : "m" (expires) ); + S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; + S390_lowcore.last_update_timer = expires; +} + /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. @@ -53,18 +78,12 @@ static void do_account_vtime(struct task_struct *tsk, int hardirq_offset) system = S390_lowcore.system_timer - ti->system_timer; S390_lowcore.steal_timer -= system; ti->system_timer = S390_lowcore.system_timer; - if (idle_task(smp_processor_id()) != current) - account_system_time(tsk, hardirq_offset, system, system); - else - account_idle_time(system); + account_system_time(tsk, hardirq_offset, system, system); steal = S390_lowcore.steal_timer; if ((s64) steal > 0) { S390_lowcore.steal_timer = 0; - if (idle_task(smp_processor_id()) != current) - account_steal_time(steal); - else - account_idle_time(steal); + account_steal_time(steal); } } @@ -96,80 +115,127 @@ void account_system_vtime(struct task_struct *tsk) __u64 timer, system; timer = S390_lowcore.last_update_timer; - asm volatile (" STPT %0" /* Store current cpu timer value */ - : "=m" (S390_lowcore.last_update_timer) ); + S390_lowcore.last_update_timer = get_vtimer(); S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; system = S390_lowcore.system_timer - ti->system_timer; S390_lowcore.steal_timer -= system; ti->system_timer = S390_lowcore.system_timer; - if (in_irq() || idle_task(smp_processor_id()) != current) - account_system_time(tsk, 0, system, system); - else - account_idle_time(system); + account_system_time(tsk, 0, system, system); } EXPORT_SYMBOL_GPL(account_system_vtime); -static inline void set_vtimer(__u64 expires) +void vtime_start_cpu(void) { + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); - __u64 timer; - - asm volatile (" STPT %0\n" /* Store current cpu timer value */ - " SPT %1" /* Set new value immediatly afterwards */ - : "=m" (timer) : "m" (expires) ); - S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer; - S390_lowcore.last_update_timer = expires; - - /* store expire time for this CPU timer */ - vq->to_expire = expires; -} - -void vtime_start_cpu_timer(void) -{ - struct vtimer_queue *vt_list; - - vt_list = &__get_cpu_var(virt_cpu_timer); - - /* CPU timer interrupt is pending, don't reprogramm it */ - if (vt_list->idle & 1LL<<63) - return; + __u64 idle_time, expires; + + /* Account time spent with enabled wait psw loaded as idle time. */ + idle_time = S390_lowcore.int_clock - idle->idle_enter; + account_idle_time(idle_time); + S390_lowcore.last_update_clock = S390_lowcore.int_clock; + + /* Account system time spent going idle. */ + S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle; + S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer; + + /* Restart vtime CPU timer */ + if (vq->do_spt) { + /* Program old expire value but first save progress. */ + expires = vq->idle - S390_lowcore.async_enter_timer; + expires += get_vtimer(); + set_vtimer(expires); + } else { + /* Don't account the CPU timer delta while the cpu was idle. */ + vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer; + } - if (!list_empty(&vt_list->list)) - set_vtimer(vt_list->idle); + spin_lock(&idle->lock); + idle->idle_time += idle_time; + idle->idle_enter = 0ULL; + idle->idle_count++; + spin_unlock(&idle->lock); } -void vtime_stop_cpu_timer(void) +void vtime_stop_cpu(void) { - struct vtimer_queue *vt_list; - - vt_list = &__get_cpu_var(virt_cpu_timer); - - /* nothing to do */ - if (list_empty(&vt_list->list)) { - vt_list->idle = VTIMER_MAX_SLICE; - goto fire; + struct s390_idle_data *idle = &__get_cpu_var(s390_idle); + struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer); + psw_t psw; + + /* Wait for external, I/O or machine check interrupt. */ + psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT; + + /* Check if the CPU timer needs to be reprogrammed. */ + if (vq->do_spt) { + __u64 vmax = VTIMER_MAX_SLICE; + /* + * The inline assembly is equivalent to + * vq->idle = get_cpu_timer(); + * set_cpu_timer(VTIMER_MAX_SLICE); + * idle->idle_enter = get_clock(); + * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | + * PSW_MASK_IO | PSW_MASK_EXT); + * The difference is that the inline assembly makes sure that + * the last three instruction are stpt, stck and lpsw in that + * order. This is done to increase the precision. + */ + asm volatile( +#ifndef CONFIG_64BIT + " basr 1,0\n" + "0: ahi 1,1f-0b\n" + " st 1,4(%2)\n" +#else /* CONFIG_64BIT */ + " larl 1,1f\n" + " stg 1,8(%2)\n" +#endif /* CONFIG_64BIT */ + " stpt 0(%4)\n" + " spt 0(%5)\n" + " stck 0(%3)\n" +#ifndef CONFIG_64BIT + " lpsw 0(%2)\n" +#else /* CONFIG_64BIT */ + " lpswe 0(%2)\n" +#endif /* CONFIG_64BIT */ + "1:" + : "=m" (idle->idle_enter), "=m" (vq->idle) + : "a" (&psw), "a" (&idle->idle_enter), + "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw) + : "memory", "cc", "1"); + } else { + /* + * The inline assembly is equivalent to + * vq->idle = get_cpu_timer(); + * idle->idle_enter = get_clock(); + * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | + * PSW_MASK_IO | PSW_MASK_EXT); + * The difference is that the inline assembly makes sure that + * the last three instruction are stpt, stck and lpsw in that + * order. This is done to increase the precision. + */ + asm volatile( +#ifndef CONFIG_64BIT + " basr 1,0\n" + "0: ahi 1,1f-0b\n" + " st 1,4(%2)\n" +#else /* CONFIG_64BIT */ + " larl 1,1f\n" + " stg 1,8(%2)\n" +#endif /* CONFIG_64BIT */ + " stpt 0(%4)\n" + " stck 0(%3)\n" +#ifndef CONFIG_64BIT + " lpsw 0(%2)\n" +#else /* CONFIG_64BIT */ + " lpswe 0(%2)\n" +#endif /* CONFIG_64BIT */ + "1:" + : "=m" (idle->idle_enter), "=m" (vq->idle) + : "a" (&psw), "a" (&idle->idle_enter), + "a" (&vq->idle), "m" (psw) + : "memory", "cc", "1"); } - - /* store the actual expire value */ - asm volatile ("STPT %0" : "=m" (vt_list->idle)); - - /* - * If the CPU timer is negative we don't reprogramm - * it because we will get instantly an interrupt. - */ - if (vt_list->idle & 1LL<<63) - return; - - vt_list->offset += vt_list->to_expire - vt_list->idle; - - /* - * We cannot halt the CPU timer, we just write a value that - * nearly never expires (only after 71 years) and re-write - * the stored expire value if we continue the timer - */ - fire: - set_vtimer(VTIMER_MAX_SLICE); } /* @@ -195,30 +261,23 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head) */ static void do_callbacks(struct list_head *cb_list) { - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; struct vtimer_list *event, *tmp; - void (*fn)(unsigned long); - unsigned long data; if (list_empty(cb_list)) return; - vt_list = &__get_cpu_var(virt_cpu_timer); + vq = &__get_cpu_var(virt_cpu_timer); list_for_each_entry_safe(event, tmp, cb_list, entry) { - fn = event->function; - data = event->data; - fn(data); - - if (!event->interval) - /* delete one shot timer */ - list_del_init(&event->entry); - else { - /* move interval timer back to list */ - spin_lock(&vt_list->lock); - list_del_init(&event->entry); - list_add_sorted(event, &vt_list->list); - spin_unlock(&vt_list->lock); + list_del_init(&event->entry); + (event->function)(event->data); + if (event->interval) { + /* Recharge interval timer */ + event->expires = event->interval + vq->elapsed; + spin_lock(&vq->lock); + list_add_sorted(event, &vq->list); + spin_unlock(&vq->lock); } } } @@ -228,64 +287,57 @@ static void do_callbacks(struct list_head *cb_list) */ static void do_cpu_timer_interrupt(__u16 error_code) { - __u64 next, delta; - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; struct vtimer_list *event, *tmp; - struct list_head *ptr; - /* the callback queue */ - struct list_head cb_list; + struct list_head cb_list; /* the callback queue */ + __u64 elapsed, next; INIT_LIST_HEAD(&cb_list); - vt_list = &__get_cpu_var(virt_cpu_timer); + vq = &__get_cpu_var(virt_cpu_timer); /* walk timer list, fire all expired events */ - spin_lock(&vt_list->lock); - - if (vt_list->to_expire < VTIMER_MAX_SLICE) - vt_list->offset += vt_list->to_expire; - - list_for_each_entry_safe(event, tmp, &vt_list->list, entry) { - if (event->expires > vt_list->offset) - /* found first unexpired event, leave */ - break; - - /* re-charge interval timer, we have to add the offset */ - if (event->interval) - event->expires = event->interval + vt_list->offset; - - /* move expired timer to the callback queue */ - list_move_tail(&event->entry, &cb_list); + spin_lock(&vq->lock); + + elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer); + BUG_ON((s64) elapsed < 0); + vq->elapsed = 0; + list_for_each_entry_safe(event, tmp, &vq->list, entry) { + if (event->expires < elapsed) + /* move expired timer to the callback queue */ + list_move_tail(&event->entry, &cb_list); + else + event->expires -= elapsed; } - spin_unlock(&vt_list->lock); + spin_unlock(&vq->lock); + + vq->do_spt = list_empty(&cb_list); do_callbacks(&cb_list); /* next event is first in list */ - spin_lock(&vt_list->lock); - if (!list_empty(&vt_list->list)) { - ptr = vt_list->list.next; - event = list_entry(ptr, struct vtimer_list, entry); - next = event->expires - vt_list->offset; - - /* add the expired time from this interrupt handler - * and the callback functions - */ - asm volatile ("STPT %0" : "=m" (delta)); - delta = 0xffffffffffffffffLL - delta + 1; - vt_list->offset += delta; - next -= delta; - } else { - vt_list->offset = 0; - next = VTIMER_MAX_SLICE; - } - spin_unlock(&vt_list->lock); - set_vtimer(next); + next = VTIMER_MAX_SLICE; + spin_lock(&vq->lock); + if (!list_empty(&vq->list)) { + event = list_first_entry(&vq->list, struct vtimer_list, entry); + next = event->expires; + } else + vq->do_spt = 0; + spin_unlock(&vq->lock); + /* + * To improve precision add the time spent by the + * interrupt handler to the elapsed time. + * Note: CPU timer counts down and we got an interrupt, + * the current content is negative + */ + elapsed = S390_lowcore.async_enter_timer - get_vtimer(); + set_vtimer(next - elapsed); + vq->timer = next - elapsed; + vq->elapsed = elapsed; } void init_virt_timer(struct vtimer_list *timer) { timer->function = NULL; INIT_LIST_HEAD(&timer->entry); - spin_lock_init(&timer->lock); } EXPORT_SYMBOL(init_virt_timer); @@ -299,44 +351,40 @@ static inline int vtimer_pending(struct vtimer_list *timer) */ static void internal_add_vtimer(struct vtimer_list *timer) { + struct vtimer_queue *vq; unsigned long flags; - __u64 done; - struct vtimer_list *event; - struct vtimer_queue *vt_list; + __u64 left, expires; - vt_list = &per_cpu(virt_cpu_timer, timer->cpu); - spin_lock_irqsave(&vt_list->lock, flags); + vq = &per_cpu(virt_cpu_timer, timer->cpu); + spin_lock_irqsave(&vq->lock, flags); BUG_ON(timer->cpu != smp_processor_id()); - /* if list is empty we only have to set the timer */ - if (list_empty(&vt_list->list)) { - /* reset the offset, this may happen if the last timer was - * just deleted by mod_virt_timer and the interrupt - * didn't happen until here - */ - vt_list->offset = 0; - goto fire; + if (list_empty(&vq->list)) { + /* First timer on this cpu, just program it. */ + list_add(&timer->entry, &vq->list); + set_vtimer(timer->expires); + vq->timer = timer->expires; + vq->elapsed = 0; + } else { + /* Check progress of old timers. */ + expires = timer->expires; + left = get_vtimer(); + if (likely((s64) expires < (s64) left)) { + /* The new timer expires before the current timer. */ + set_vtimer(expires); + vq->elapsed += vq->timer - left; + vq->timer = expires; + } else { + vq->elapsed += vq->timer - left; + vq->timer = left; + } + /* Insert new timer into per cpu list. */ + timer->expires += vq->elapsed; + list_add_sorted(timer, &vq->list); } - /* save progress */ - asm volatile ("STPT %0" : "=m" (done)); - - /* calculate completed work */ - done = vt_list->to_expire - done + vt_list->offset; - vt_list->offset = 0; - - list_for_each_entry(event, &vt_list->list, entry) - event->expires -= done; - - fire: - list_add_sorted(timer, &vt_list->list); - - /* get first element, which is the next vtimer slice */ - event = list_entry(vt_list->list.next, struct vtimer_list, entry); - - set_vtimer(event->expires); - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); /* release CPU acquired in prepare_vtimer or mod_virt_timer() */ put_cpu(); } @@ -381,14 +429,15 @@ EXPORT_SYMBOL(add_virt_timer_periodic); * If we change a pending timer the function must be called on the CPU * where the timer is running on, e.g. by smp_call_function_single() * - * The original mod_timer adds the timer if it is not pending. For compatibility - * we do the same. The timer will be added on the current CPU as a oneshot timer. + * The original mod_timer adds the timer if it is not pending. For + * compatibility we do the same. The timer will be added on the current + * CPU as a oneshot timer. * * returns whether it has modified a pending timer (1) or not (0) */ int mod_virt_timer(struct vtimer_list *timer, __u64 expires) { - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; unsigned long flags; int cpu; @@ -404,17 +453,17 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires) return 1; cpu = get_cpu(); - vt_list = &per_cpu(virt_cpu_timer, cpu); + vq = &per_cpu(virt_cpu_timer, cpu); /* check if we run on the right CPU */ BUG_ON(timer->cpu != cpu); /* disable interrupts before test if timer is pending */ - spin_lock_irqsave(&vt_list->lock, flags); + spin_lock_irqsave(&vq->lock, flags); /* if timer isn't pending add it on the current CPU */ if (!vtimer_pending(timer)) { - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); /* we do not activate an interval timer with mod_virt_timer */ timer->interval = 0; timer->expires = expires; @@ -431,7 +480,7 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires) timer->interval = expires; /* the timer can't expire anymore so we can release the lock */ - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); internal_add_vtimer(timer); return 1; } @@ -445,25 +494,19 @@ EXPORT_SYMBOL(mod_virt_timer); int del_virt_timer(struct vtimer_list *timer) { unsigned long flags; - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; /* check if timer is pending */ if (!vtimer_pending(timer)) return 0; - vt_list = &per_cpu(virt_cpu_timer, timer->cpu); - spin_lock_irqsave(&vt_list->lock, flags); + vq = &per_cpu(virt_cpu_timer, timer->cpu); + spin_lock_irqsave(&vq->lock, flags); /* we don't interrupt a running timer, just let it expire! */ list_del_init(&timer->entry); - /* last timer removed */ - if (list_empty(&vt_list->list)) { - vt_list->to_expire = 0; - vt_list->offset = 0; - } - - spin_unlock_irqrestore(&vt_list->lock, flags); + spin_unlock_irqrestore(&vq->lock, flags); return 1; } EXPORT_SYMBOL(del_virt_timer); @@ -473,24 +516,19 @@ EXPORT_SYMBOL(del_virt_timer); */ void init_cpu_vtimer(void) { - struct vtimer_queue *vt_list; + struct vtimer_queue *vq; /* kick the virtual timer */ - S390_lowcore.exit_timer = VTIMER_MAX_SLICE; - S390_lowcore.last_update_timer = VTIMER_MAX_SLICE; asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock)); - asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer)); + asm volatile ("STPT %0" : "=m" (S390_lowcore.last_update_timer)); + + /* initialize per cpu vtimer structure */ + vq = &__get_cpu_var(virt_cpu_timer); + INIT_LIST_HEAD(&vq->list); + spin_lock_init(&vq->lock); /* enable cpu timer interrupts */ __ctl_set_bit(0,10); - - vt_list = &__get_cpu_var(virt_cpu_timer); - INIT_LIST_HEAD(&vt_list->list); - spin_lock_init(&vt_list->lock); - vt_list->to_expire = 0; - vt_list->offset = 0; - vt_list->idle = 0; - } void __init vtime_init(void) diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 8a8df7552969..06b71823f399 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -632,8 +632,8 @@ do_IRQ (struct pt_regs *regs) struct pt_regs *old_regs; old_regs = set_irq_regs(regs); - irq_enter(); s390_idle_check(); + irq_enter(); if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) /* Serve timer interrupts first. */ clock_comparator_work(); diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c index 834e9ee7e934..92b0417f8e12 100644 --- a/drivers/s390/s390mach.c +++ b/drivers/s390/s390mach.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "s390mach.h" static struct semaphore m_sem; @@ -369,6 +370,8 @@ s390_do_machine_check(struct pt_regs *regs) lockdep_off(); + s390_idle_check(); + mci = (struct mci *) &S390_lowcore.mcck_interruption_code; mcck = &__get_cpu_var(cpu_mcck); umode = user_mode(regs); -- cgit v1.2.3 From c742b31c03f37c5c499178f09f57381aa6c70131 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 31 Dec 2008 15:11:42 +0100 Subject: [PATCH] fast vdso implementation for CLOCK_THREAD_CPUTIME_ID The extract cpu time instruction (ectg) instruction allows the user process to get the current thread cputime without calling into the kernel. The code that uses the instruction needs to switch to the access registers mode to get access to the per-cpu info page that contains the two base values that are needed to calculate the current cputime from the CPU timer with the ectg instruction. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 9 ++- arch/s390/include/asm/vdso.h | 15 +++- arch/s390/kernel/asm-offsets.c | 5 ++ arch/s390/kernel/entry64.S | 45 ++++++------ arch/s390/kernel/head64.S | 2 + arch/s390/kernel/setup.c | 2 + arch/s390/kernel/smp.c | 9 +++ arch/s390/kernel/vdso.c | 123 +++++++++++++++++++++++++++++++- arch/s390/kernel/vdso64/clock_getres.S | 5 ++ arch/s390/kernel/vdso64/clock_gettime.S | 39 ++++++++-- 10 files changed, 222 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index a547817cf1ab..ffdef5fe8587 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -106,8 +106,10 @@ #define __LC_IPLDEV 0xDB8 #define __LC_CURRENT 0xDD8 #define __LC_INT_CLOCK 0xDE8 +#define __LC_VDSO_PER_CPU 0xE38 #endif /* __s390x__ */ +#define __LC_PASTE 0xE40 #define __LC_PANIC_MAGIC 0xE00 #ifndef __s390x__ @@ -381,7 +383,12 @@ struct _lowcore /* whether the kernel died with panic() or not */ __u32 panic_magic; /* 0xe00 */ - __u8 pad13[0x11b8-0xe04]; /* 0xe04 */ + /* Per cpu primary space access list */ + __u8 pad_0xe04[0xe3c-0xe04]; /* 0xe04 */ + __u32 vdso_per_cpu_data; /* 0xe3c */ + __u32 paste[16]; /* 0xe40 */ + + __u8 pad13[0x11b8-0xe80]; /* 0xe80 */ /* 64 bit extparam used for pfault, diag 250 etc */ __u64 ext_params2; /* 0x11B8 */ diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index a44f4fe16a35..7bdd7c8ebc91 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -12,9 +12,9 @@ #ifndef __ASSEMBLY__ /* - * Note about this structure: + * Note about the vdso_data and vdso_per_cpu_data structures: * - * NEVER USE THIS IN USERSPACE CODE DIRECTLY. The layout of this + * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the * structure is supposed to be known only to the function in the vdso * itself and may change without notice. */ @@ -28,10 +28,21 @@ struct vdso_data { __u64 wtom_clock_nsec; /* 0x28 */ __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ __u32 tz_dsttime; /* Type of dst correction 0x34 */ + __u32 ectg_available; +}; + +struct vdso_per_cpu_data { + __u64 ectg_timer_base; + __u64 ectg_user_time; }; extern struct vdso_data *vdso_data; +#ifdef CONFIG_64BIT +int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore); +void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore); +#endif + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index e641f60bac99..67a60016babb 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -48,6 +48,11 @@ int main(void) DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); + DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available)); + DEFINE(__VDSO_ECTG_BASE, + offsetof(struct vdso_per_cpu_data, ectg_timer_base)); + DEFINE(__VDSO_ECTG_USER, + offsetof(struct vdso_per_cpu_data, ectg_user_time)); /* constants used by the vdso */ DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index ae83c195171c..c6fbde13971a 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -177,8 +177,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \ .if !\sync ni \psworg+1,0xfd # clear wait state bit .endif - lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user + lg %r14,__LC_VDSO_PER_CPU + lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user stpt __LC_EXIT_TIMER + mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER + lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user lpswe \psworg # back to caller .endm @@ -980,23 +983,23 @@ cleanup_sysc_return: cleanup_sysc_leave: clc 8(8,%r12),BASED(cleanup_sysc_leave_insn) - je 2f - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER + je 3f clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8) - je 2f - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) + jhe 0f + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER +0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) cghi %r12,__LC_MCK_OLD_PSW - jne 0f + jne 1f mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 1f -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -1: lmg %r0,%r11,SP_R0(%r15) + j 2f +1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) +2: lmg %r0,%r11,SP_R0(%r15) lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +3: la %r12,__LC_RETURN_PSW br %r14 cleanup_sysc_leave_insn: .quad sysc_done - 4 - .quad sysc_done - 8 + .quad sysc_done - 16 cleanup_io_return: mvc __LC_RETURN_PSW(8),0(%r12) @@ -1006,23 +1009,23 @@ cleanup_io_return: cleanup_io_leave: clc 8(8,%r12),BASED(cleanup_io_leave_insn) - je 2f - mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER + je 3f clc 8(8,%r12),BASED(cleanup_io_leave_insn+8) - je 2f - mvc __LC_RETURN_PSW(16),SP_PSW(%r15) + jhe 0f + mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER +0: mvc __LC_RETURN_PSW(16),SP_PSW(%r15) cghi %r12,__LC_MCK_OLD_PSW - jne 0f + jne 1f mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) - j 1f -0: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) -1: lmg %r0,%r11,SP_R0(%r15) + j 2f +1: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) +2: lmg %r0,%r11,SP_R0(%r15) lg %r15,SP_R15(%r15) -2: la %r12,__LC_RETURN_PSW +3: la %r12,__LC_RETURN_PSW br %r14 cleanup_io_leave_insn: .quad io_done - 4 - .quad io_done - 8 + .quad io_done - 16 /* * Integer constants diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 3ccd36b24b8f..f9f70aa15244 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -87,6 +87,8 @@ startup_continue: lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12) + lghi %r0,__LC_PASTE + stg %r0,__LC_VDSO_PER_CPU # # Setup stack # diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b7a1efd5522c..d825f4950e4e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -427,6 +427,8 @@ setup_lowcore(void) /* enable extended save area */ __ctl_set_bit(14, 29); } +#else + lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0]; #endif set_prefix((u32)(unsigned long) lc); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 3979a6fc0882..b3461e8f1705 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "entry.h" /* @@ -506,6 +507,9 @@ static int __cpuinit smp_alloc_lowcore(int cpu) goto out; lowcore->extended_save_area_addr = (u32) save_area; } +#else + if (vdso_alloc_per_cpu(cpu, lowcore)) + goto out; #endif lowcore_ptr[cpu] = lowcore; return 0; @@ -528,6 +532,8 @@ static void smp_free_lowcore(int cpu) #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) free_page((unsigned long) lowcore->extended_save_area_addr); +#else + vdso_free_per_cpu(cpu, lowcore); #endif free_page(lowcore->panic_stack - PAGE_SIZE); free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); @@ -670,6 +676,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order); panic_stack = __get_free_page(GFP_KERNEL); async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); + BUG_ON(!lowcore || !panic_stack || !async_stack); #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) save_area = get_zeroed_page(GFP_KERNEL); @@ -683,6 +690,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) #ifndef CONFIG_64BIT if (MACHINE_HAS_IEEE) lowcore->extended_save_area_addr = (u32) save_area; +#else + BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore)); #endif set_prefix((u32)(unsigned long) lowcore); local_mcck_enable(); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 10a6ccef4412..25a6a82f1c02 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -31,9 +31,6 @@ #include #include -/* Max supported size for symbol names */ -#define MAX_SYMNAME 64 - #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) extern char vdso32_start, vdso32_end; static void *vdso32_kbase = &vdso32_start; @@ -70,6 +67,119 @@ static union { } vdso_data_store __attribute__((__section__(".data.page_aligned"))); struct vdso_data *vdso_data = &vdso_data_store.data; +/* + * Setup vdso data page. + */ +static void vdso_init_data(struct vdso_data *vd) +{ + unsigned int facility_list; + + facility_list = stfl(); + vd->ectg_available = switch_amode && (facility_list & 1); +} + +#ifdef CONFIG_64BIT +/* + * Setup per cpu vdso data page. + */ +static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd) +{ +} + +/* + * Allocate/free per cpu vdso data. + */ +#ifdef CONFIG_64BIT +#define SEGMENT_ORDER 2 +#else +#define SEGMENT_ORDER 1 +#endif + +int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + int i; + + lowcore->vdso_per_cpu_data = __LC_PASTE; + + if (!switch_amode || !vdso_enabled) + return 0; + + segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); + page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA); + page_frame = get_zeroed_page(GFP_KERNEL); + if (!segment_table || !page_table || !page_frame) + goto out; + + clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY, + PAGE_SIZE << SEGMENT_ORDER); + clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY, + 256*sizeof(unsigned long)); + + *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; + *(unsigned long *) page_table = _PAGE_RO + page_frame; + + psal = (u32 *) (page_table + 256*sizeof(unsigned long)); + aste = psal + 32; + + for (i = 4; i < 32; i += 4) + psal[i] = 0x80000000; + + lowcore->paste[4] = (u32)(addr_t) psal; + psal[0] = 0x20000000; + psal[2] = (u32)(addr_t) aste; + *(unsigned long *) (aste + 2) = segment_table + + _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; + aste[4] = (u32)(addr_t) psal; + lowcore->vdso_per_cpu_data = page_frame; + + vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame); + return 0; + +out: + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); + return -ENOMEM; +} + +#ifdef CONFIG_HOTPLUG_CPU +void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore) +{ + unsigned long segment_table, page_table, page_frame; + u32 *psal, *aste; + + if (!switch_amode || !vdso_enabled) + return; + + psal = (u32 *)(addr_t) lowcore->paste[4]; + aste = (u32 *)(addr_t) psal[2]; + segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK; + page_table = *(unsigned long *) segment_table; + page_frame = *(unsigned long *) page_table; + + free_page(page_frame); + free_page(page_table); + free_pages(segment_table, SEGMENT_ORDER); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +static void __vdso_init_cr5(void *dummy) +{ + unsigned long cr5; + + cr5 = offsetof(struct _lowcore, paste); + __ctl_load(cr5, 5, 5); +} + +static void vdso_init_cr5(void) +{ + if (switch_amode && vdso_enabled) + on_each_cpu(__vdso_init_cr5, NULL, 1); +} +#endif /* CONFIG_64BIT */ + /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree @@ -172,6 +282,9 @@ static int __init vdso_init(void) { int i; + if (!vdso_enabled) + return 0; + vdso_init_data(vdso_data); #if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) /* Calculate the size of the 32 bit vDSO */ vdso32_pages = ((&vdso32_end - &vdso32_start @@ -208,6 +321,10 @@ static int __init vdso_init(void) } vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); vdso64_pagelist[vdso64_pages] = NULL; +#ifndef CONFIG_SMP + BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore)); +#endif + vdso_init_cr5(); #endif /* CONFIG_64BIT */ get_page(virt_to_page(vdso_data)); diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index 488e31a3c0e7..9ce8caafdb4e 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -22,7 +22,12 @@ __kernel_clock_getres: cghi %r2,CLOCK_REALTIME je 0f cghi %r2,CLOCK_MONOTONIC + je 0f + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ jne 2f + larl %r5,_vdso_data + icm %r0,15,__LC_ECTG_OK(%r5) + jz 2f 0: ltgr %r3,%r3 jz 1f /* res == NULL */ larl %r1,3f diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 738a410b7eb2..79dbfee831ec 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -22,8 +22,10 @@ __kernel_clock_gettime: larl %r5,_vdso_data cghi %r2,CLOCK_REALTIME je 4f + cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */ + je 9f cghi %r2,CLOCK_MONOTONIC - jne 9f + jne 12f /* CLOCK_MONOTONIC */ ltgr %r3,%r3 @@ -42,7 +44,7 @@ __kernel_clock_gettime: alg %r0,__VDSO_WTOM_SEC(%r5) clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ jne 0b - larl %r5,10f + larl %r5,13f 1: clg %r1,0(%r5) jl 2f slg %r1,0(%r5) @@ -68,7 +70,7 @@ __kernel_clock_gettime: lg %r0,__VDSO_XTIME_SEC(%r5) clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ jne 5b - larl %r5,10f + larl %r5,13f 6: clg %r1,0(%r5) jl 7f slg %r1,0(%r5) @@ -79,11 +81,38 @@ __kernel_clock_gettime: 8: lghi %r2,0 br %r14 + /* CLOCK_THREAD_CPUTIME_ID for this thread */ +9: icm %r0,15,__VDSO_ECTG_OK(%r5) + jz 12f + ear %r2,%a4 + llilh %r4,0x0100 + sar %a4,%r4 + lghi %r4,0 + sacf 512 /* Magic ectg instruction */ + .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 + sacf 0 + sar %a4,%r2 + algr %r1,%r0 /* r1 = cputime as TOD value */ + mghi %r1,1000 /* convert to nanoseconds */ + srlg %r1,%r1,12 /* r1 = cputime in nanosec */ + lgr %r4,%r1 + larl %r5,13f + srlg %r1,%r1,9 /* divide by 1000000000 */ + mlg %r0,8(%r5) + srlg %r0,%r0,11 /* r0 = tv_sec */ + stg %r0,0(%r3) + msg %r0,0(%r5) /* calculate tv_nsec */ + slgr %r4,%r0 /* r4 = tv_nsec */ + stg %r4,8(%r3) + lghi %r2,0 + br %r14 + /* Fallback to system call */ -9: lghi %r1,__NR_clock_gettime +12: lghi %r1,__NR_clock_gettime svc 0 br %r14 -10: .quad 1000000000 +13: .quad 1000000000 +14: .quad 19342813113834067 .cfi_endproc .size __kernel_clock_gettime,.-__kernel_clock_gettime -- cgit v1.2.3 From f320786063a9d1f885d2cf34ab44aa69c1d88f43 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:13 +1030 Subject: cpumask: Remove IA64 definition of total_cpus now it's in core code Impact: fix IA64 compile Fortunately, they have exactly the same semantics. Signed-off-by: Rusty Russell --- arch/ia64/kernel/acpi.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 54ae373e6e22..0553648b7595 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -202,7 +202,6 @@ char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size) Boot-time Table Parsing -------------------------------------------------------------------------- */ -static int total_cpus __initdata; static int available_cpus __initdata; struct acpi_table_madt *acpi_madt __initdata; static u8 has_8259; -- cgit v1.2.3 From 9e2f913df70b378379a358a44e7d286f7b765e8e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:14 +1030 Subject: percpu: fix percpu accessors to potentially !cpu_possible() cpus: m32r Impact: CPU iterator bugfixes Percpu areas are only allocated for possible cpus. In general, you shouldn't access random cpu's percpu areas. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Acked-by: Ingo Molnar Acked-by: Hirokazu Takata --- arch/m32r/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c index 0f06b3722e96..2547d6c4a827 100644 --- a/arch/m32r/kernel/smpboot.c +++ b/arch/m32r/kernel/smpboot.c @@ -592,7 +592,7 @@ int setup_profiling_timer(unsigned int multiplier) * accounting. At that time they also adjust their APIC timers * accordingly. */ - for (i = 0; i < NR_CPUS; ++i) + for_each_possible_cpu(i) per_cpu(prof_multiplier, i) = multiplier; return 0; -- cgit v1.2.3 From 165ac433fa3f01ba99b29972f3adc283d03b0f17 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:16 +1030 Subject: parisc: remove gratuitous cpu_online_map declaration. This is defined in linux/cpumask.h (included in this file already), and this is now defined differently. Signed-off-by: Rusty Russell Cc: linux-parisc@vger.kernel.org --- arch/parisc/include/asm/smp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h index 409e698f4361..6ef4b7867b1b 100644 --- a/arch/parisc/include/asm/smp.h +++ b/arch/parisc/include/asm/smp.h @@ -16,8 +16,6 @@ #include typedef unsigned long address_t; -extern cpumask_t cpu_online_map; - /* * Private routines/data -- cgit v1.2.3 From 96b8d4c19d797200b973caab57ca842531184c13 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:16 +1030 Subject: avr32: define __fls Like fls, but can't be handed 0 and returns the bit number. (I broke this arch in linux-next by using __fls in generic code). Signed-off-by: Rusty Russell --- arch/avr32/include/asm/bitops.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/avr32/include/asm/bitops.h b/arch/avr32/include/asm/bitops.h index 1a50b69b1a19..f7dd5f71edf7 100644 --- a/arch/avr32/include/asm/bitops.h +++ b/arch/avr32/include/asm/bitops.h @@ -263,6 +263,11 @@ static inline int fls(unsigned long word) return 32 - result; } +static inline int __fls(unsigned long word) +{ + return fls(word) - 1; +} + unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size); unsigned long find_next_zero_bit(const unsigned long *addr, -- cgit v1.2.3 From ccec25ff69d5f48c7a088c16fe2dc7e11d9e87fe Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:17 +1030 Subject: blackfin: define __fls Like fls, but can't be handed 0 and returns the bit number. (I broke this arch in linux-next by using __fls in generic code). Signed-off-by: Rusty Russell Acked-by: Mike Frysinger --- arch/blackfin/include/asm/bitops.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/blackfin/include/asm/bitops.h b/arch/blackfin/include/asm/bitops.h index b39a175c79c1..c428e4106f89 100644 --- a/arch/blackfin/include/asm/bitops.h +++ b/arch/blackfin/include/asm/bitops.h @@ -213,6 +213,7 @@ static __inline__ int __test_bit(int nr, const void *addr) #endif /* __KERNEL__ */ #include +#include #include #endif /* _BLACKFIN_BITOPS_H */ -- cgit v1.2.3 From 0db5d3d2f58804edb394e8008c7d9744110338a2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:18 +1030 Subject: m68knommu: define __fls Like fls, but can't be handed 0 and returns the bit number. (I broke this arch in linux-next by using __fls in generic code). Signed-off-by: Rusty Russell --- arch/m68knommu/include/asm/bitops.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/m68knommu/include/asm/bitops.h b/arch/m68knommu/include/asm/bitops.h index 6f3685eab44c..9d3cbe5fad1e 100644 --- a/arch/m68knommu/include/asm/bitops.h +++ b/arch/m68knommu/include/asm/bitops.h @@ -331,6 +331,7 @@ found_middle: #endif /* __KERNEL__ */ #include +#include #include #endif /* _M68KNOMMU_BITOPS_H */ -- cgit v1.2.3 From 78fd744f827586615da5b387fa9f0af1888601b6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:20 +1030 Subject: cpumask: Introduce topology_core_cpumask()/topology_thread_cpumask(): sparc Impact: New API The old topology_core_siblings() and topology_thread_siblings() return a cpumask_t; these new ones return a (const) struct cpumask *. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/sparc/include/asm/topology_64.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 3270cfb1ab53..b8a65b64e1df 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -79,6 +79,8 @@ static inline int pcibus_to_node(struct pci_bus *pbus) #define topology_core_id(cpu) (cpu_data(cpu).core_id) #define topology_core_siblings(cpu) (cpu_core_map[cpu]) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) +#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #define mc_capable() (sparc64_multi_core) #define smt_capable() (sparc64_multi_core) #endif /* CONFIG_SMP */ -- cgit v1.2.3 From 2bb23a63f22f0e2d91fee93ff5ca9c29e180b146 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:20 +1030 Subject: cpumask: Introduce topology_core_cpumask()/topology_thread_cpumask(): s390 Impact: New API The old topology_core_siblings() and topology_thread_siblings() return a cpumask_t; these new ones return a (const) struct cpumask *. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/s390/include/asm/topology.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index fff4a86c602a..c93eb50e1d09 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -11,6 +11,7 @@ const struct cpumask *cpu_coregroup_mask(unsigned int cpu); extern cpumask_t cpu_core_map[NR_CPUS]; #define topology_core_siblings(cpu) (cpu_core_map[cpu]) +#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) int topology_set_cpu_management(int fc); void topology_schedule_update(void); -- cgit v1.2.3 From 9150641dd17fe9e213ab3391c8ebfc228daa2d9d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:21 +1030 Subject: cpumask: Introduce topology_core_cpumask()/topology_thread_cpumask(): powerpc Impact: New API The old topology_core_siblings() and topology_thread_siblings() return a cpumask_t; these new ones return a (const) struct cpumask *. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/powerpc/include/asm/topology.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index bcf25c2b8d21..236dae1cd29f 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -113,6 +113,8 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev, #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) +#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) +#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu)) #define topology_core_id(cpu) (cpu_to_core_id(cpu)) #endif #endif -- cgit v1.2.3 From 333af15341b2f6cd813c054e1b441d7b6d8e9318 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:21 +1030 Subject: cpumask: Introduce topology_core_cpumask()/topology_thread_cpumask(): ia64 Impact: New API The old topology_core_siblings() and topology_thread_siblings() return a cpumask_t; these new ones return a (const) struct cpumask *. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis --- arch/ia64/include/asm/topology.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 66f0f1ef9e7f..97ae7f509109 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -112,6 +112,8 @@ void build_cpu_to_node_map(void); #define topology_core_id(cpu) (cpu_data(cpu)->core_id) #define topology_core_siblings(cpu) (cpu_core_map[cpu]) #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) +#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #define smt_capable() (smp_num_siblings > 1) #endif -- cgit v1.2.3 From d036e67b40f52bdd95392390108defbac7e53837 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 1 Jan 2009 10:12:26 +1030 Subject: cpumask: convert kernel/irq Impact: Reduce stack usage, use new cpumask API. ALPHA mod! Main change is that irq_default_affinity becomes a cpumask_var_t, so treat it as a pointer (this effects alpha). Signed-off-by: Rusty Russell --- arch/alpha/kernel/irq.c | 3 ++- include/linux/interrupt.h | 2 +- kernel/irq/manage.c | 11 +++++++++-- kernel/irq/proc.c | 32 +++++++++++++++++++++----------- 4 files changed, 33 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c index d0f1620007f7..703731accda6 100644 --- a/arch/alpha/kernel/irq.c +++ b/arch/alpha/kernel/irq.c @@ -50,7 +50,8 @@ int irq_select_affinity(unsigned int irq) if (!irq_desc[irq].chip->set_affinity || irq_user_affinity[irq]) return 1; - while (!cpu_possible(cpu) || !cpu_isset(cpu, irq_default_affinity)) + while (!cpu_possible(cpu) || + !cpumask_test_cpu(cpu, irq_default_affinity)) cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0); last_cpu = cpu; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index dfaee6bd265b..91f1ef8e5810 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -109,7 +109,7 @@ extern void enable_irq(unsigned int irq); #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) -extern cpumask_t irq_default_affinity; +extern cpumask_var_t irq_default_affinity; extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask); extern int irq_can_set_affinity(unsigned int irq); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 61c4a9b62165..cd0cd8dcb345 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -16,8 +16,15 @@ #include "internals.h" #ifdef CONFIG_SMP +cpumask_var_t irq_default_affinity; -cpumask_t irq_default_affinity = CPU_MASK_ALL; +static int init_irq_default_affinity(void) +{ + alloc_cpumask_var(&irq_default_affinity, GFP_KERNEL); + cpumask_setall(irq_default_affinity); + return 0; +} +core_initcall(init_irq_default_affinity); /** * synchronize_irq - wait for pending IRQ handlers (on other CPUs) @@ -127,7 +134,7 @@ int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc) desc->status &= ~IRQ_AFFINITY_SET; } - cpumask_and(&desc->affinity, cpu_online_mask, &irq_default_affinity); + cpumask_and(&desc->affinity, cpu_online_mask, irq_default_affinity); set_affinity: desc->chip->set_affinity(irq, &desc->affinity); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index d2c0e5ee53c5..2abd3a7716ed 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -20,7 +20,7 @@ static struct proc_dir_entry *root_irq_dir; static int irq_affinity_proc_show(struct seq_file *m, void *v) { struct irq_desc *desc = irq_to_desc((long)m->private); - cpumask_t *mask = &desc->affinity; + const struct cpumask *mask = &desc->affinity; #ifdef CONFIG_GENERIC_PENDING_IRQ if (desc->status & IRQ_MOVE_PENDING) @@ -93,7 +93,7 @@ static const struct file_operations irq_affinity_proc_fops = { static int default_affinity_show(struct seq_file *m, void *v) { - seq_cpumask(m, &irq_default_affinity); + seq_cpumask(m, irq_default_affinity); seq_putc(m, '\n'); return 0; } @@ -101,27 +101,37 @@ static int default_affinity_show(struct seq_file *m, void *v) static ssize_t default_affinity_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - cpumask_t new_value; + cpumask_var_t new_value; int err; - err = cpumask_parse_user(buffer, count, &new_value); + if (!alloc_cpumask_var(&new_value, GFP_KERNEL)) + return -ENOMEM; + + err = cpumask_parse_user(buffer, count, new_value); if (err) - return err; + goto out; - if (!is_affinity_mask_valid(new_value)) - return -EINVAL; + if (!is_affinity_mask_valid(new_value)) { + err = -EINVAL; + goto out; + } /* * Do not allow disabling IRQs completely - it's a too easy * way to make the system unusable accidentally :-) At least * one online CPU still has to be targeted. */ - if (!cpus_intersects(new_value, cpu_online_map)) - return -EINVAL; + if (!cpumask_intersects(new_value, cpu_online_mask)) { + err = -EINVAL; + goto out; + } - irq_default_affinity = new_value; + cpumask_copy(irq_default_affinity, new_value); + err = count; - return count; +out: + free_cpumask_var(new_value); + return err; } static int default_affinity_open(struct inode *inode, struct file *file) -- cgit v1.2.3 From c64d8996bd758cedc2ddc04b86ca66fa1d8599cf Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 2 Jan 2009 11:27:18 +0300 Subject: x86: early_printk - use sizeof instead of hardcoded number Impact: cleanup Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 23b138e31e9c..504ad198e4ad 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -886,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...) va_list ap; va_start(ap, fmt); - n = vscnprintf(buf, 512, fmt, ap); + n = vscnprintf(buf, sizeof(buf), fmt, ap); early_console->write(early_console, buf, n); va_end(ap); } -- cgit v1.2.3 From a9067d537615d534dcef06c0d819472e43a0d152 Mon Sep 17 00:00:00 2001 From: Ingo Brueckl Date: Fri, 2 Jan 2009 14:42:00 +0100 Subject: x86: convert permanent_kmaps_init() from macro to inline Impact: cleanup This compiler warning: arch/x86/mm/init_32.c:515: warning: unused variable 'pgd_base' triggers because permanent_kmaps_init() is a CPP macro in the !CONFIG_HIGHMEM case, that does not tell the compiler that the 'pgd_base' parameter is used. Convert permanent_kmaps_init() (and set_highmem_pages_init()) to C inline functions - which gives the parameter a proper type and which gets rid of the compiler warning as well. Signed-off-by: Ingo Brueckl Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 800e1d94c1b5..ad98b18f2b48 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -434,8 +434,12 @@ static void __init set_highmem_pages_init(void) #endif /* !CONFIG_NUMA */ #else -# define permanent_kmaps_init(pgd_base) do { } while (0) -# define set_highmem_pages_init() do { } while (0) +static inline void permanent_kmaps_init(pgd_t *pgd_base) +{ +} +static inline void set_highmem_pages_init(void) +{ +} #endif /* CONFIG_HIGHMEM */ void __init native_pagetable_setup_start(pgd_t *base) -- cgit v1.2.3 From 26799a63110dcbe81291ea53178f6b4810d07424 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Wed, 31 Dec 2008 13:44:46 -0800 Subject: x86: fix incorrect __read_mostly on _boot_cpu_pda The pda rework (commit 3461b0af025251bbc6b3d56c821c6ac2de6f7209) to remove static boot cpu pdas introduced a performance bug. _boot_cpu_pda is the actual pda used by the boot cpu and is definitely not "__read_mostly" and ended up polluting the read mostly section with writes. This bug caused regression of about 8-10% on certain syscall intensive workloads. Signed-off-by: Ravikiran Thirumalai Acked-by: Mike Travis Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 388e05a5fc17..b9a4d8c4b935 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -27,7 +27,7 @@ #include /* boot cpu pda */ -static struct x8664_pda _boot_cpu_pda __read_mostly; +static struct x8664_pda _boot_cpu_pda; #ifdef CONFIG_SMP /* -- cgit v1.2.3 From 46814dded1b972a07b1609d81632eef3009fbb10 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Wed, 31 Dec 2008 13:20:50 -0600 Subject: x86, UV: remove erroneous BAU initialization Impact: fix crash on x86/UV UV is the SGI "UltraViolet" machine, which is x86_64 based. BAU is the "Broadcast Assist Unit", used for TLB shootdown in UV. This patch removes the allocation and initialization of an unused table. This table is left over from a development test mode. It is unused in the present code. And it was incorrectly initialized: 8 entries allocated but 17 initialized, causing slab corruption. This patch should go into 2.6.27 and 2.6.28 as well as the current tree. Diffed against 2.6.28 (linux-next, 12/30/08) Signed-off-by: Cliff Wickman Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_uv.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 6a00e5faaa74..f885023167e0 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -582,7 +582,6 @@ static int __init uv_ptc_init(void) static struct bau_control * __init uv_table_bases_init(int blade, int node) { int i; - int *ip; struct bau_msg_status *msp; struct bau_control *bau_tabp; @@ -599,13 +598,6 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node) bau_cpubits_clear(&msp->seen_by, (int) uv_blade_nr_possible_cpus(blade)); - bau_tabp->watching = - kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node); - BUG_ON(!bau_tabp->watching); - - for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++) - *ip = 0; - uv_bau_table_bases[blade] = bau_tabp; return bau_tabp; @@ -628,7 +620,6 @@ uv_table_bases_finish(int blade, int node, int cur_cpu, bcp->bau_msg_head = bau_tablesp->va_queue_first; bcp->va_queue_first = bau_tablesp->va_queue_first; bcp->va_queue_last = bau_tablesp->va_queue_last; - bcp->watching = bau_tablesp->watching; bcp->msg_statuses = bau_tablesp->msg_statuses; bcp->descriptor_base = adp; } -- cgit v1.2.3 From f634fa941188a91dbf1dab961fe7a4509852fd6e Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 31 Dec 2008 16:29:48 +0530 Subject: x86: cpuid.c fix style problems Impact: cleanup Fixes style problems: WARNING: Use #include instead of ERROR: "foo * bar" should be "foo *bar" ERROR: trailing whitespace WARNING: usage of NR_CPUS is often wrong - consider using cpu_possible(), num_possible_cpus(), for_each_possible_cpu(), etc total: 2 errors, 2 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpuid.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 72cefd1e649b..85d28d53f5d3 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -39,10 +39,10 @@ #include #include #include +#include #include #include -#include #include static struct class *cpuid_class; @@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) } static ssize_t cpuid_read(struct file *file, char __user *buf, - size_t count, loff_t * ppos) + size_t count, loff_t *ppos) { char __user *tmp = buf; struct cpuid_regs cmd; @@ -117,7 +117,7 @@ static int cpuid_open(struct inode *inode, struct file *file) unsigned int cpu; struct cpuinfo_x86 *c; int ret = 0; - + lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); -- cgit v1.2.3 From 423a54058f746579aff1430877dbc82f17442b34 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 31 Dec 2008 16:42:20 +0530 Subject: x86: ldt.c fix style problems Impact: cleanup Fixes style problems: WARNING: Use #include instead of ERROR: space required before the open parenthesis '(' total: 1 errors, 1 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/ldt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index eee32b43fee3..71f1d99a635d 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -12,8 +12,8 @@ #include #include #include +#include -#include #include #include #include @@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) if (err < 0) return err; - for(i = 0; i < old->size; i++) + for (i = 0; i < old->size; i++) write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); return 0; } -- cgit v1.2.3 From dceb4521c8ed24b9fe4230e0d385cf4770260383 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 31 Dec 2008 17:35:02 +0530 Subject: x86: nmi.c fix style problems Impact: cleanup, fix style problems Fixes style problems: WARNING: Use #include instead of WARNING: Use #include instead of total: 0 errors, 2 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/nmi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 8bd1bf9622a7..45a09ccdc214 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -26,11 +26,10 @@ #include #include #include +#include #include #include -#include -#include #include #include -- cgit v1.2.3 From 103ceffb9501531f6931df6aebc11a05189201f0 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 2 Jan 2009 23:43:25 +0530 Subject: x86: mpparse.c fix style problems Impact: cleanup, fix style problems, more readable Fixes style problems: WARNING: Use #include instead of WARNING: Use #include instead of WARNING: suspect code indent for conditional statements (8, 17) WARNING: space prohibited between function name and open parenthesis '(' total: 0 errors, 5 warnings Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 45e3b69808ba..c5c5b8df1dbc 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -16,14 +16,14 @@ #include #include #include +#include +#include -#include #include #include #include #include #include -#include #include #include #include @@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m) #endif if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { - set_bit(m->mpc_busid, mp_bus_not_pci); -#if defined(CONFIG_EISA) || defined (CONFIG_MCA) + set_bit(m->mpc_busid, mp_bus_not_pci); +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; #endif } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { @@ -104,7 +104,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) x86_quirks->mpc_oem_pci_bus(m); clear_bit(m->mpc_busid, mp_bus_not_pci); -#if defined(CONFIG_EISA) || defined (CONFIG_MCA) +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; -- cgit v1.2.3 From cfb2a494bb7dca9cf8d1632fbed14b34db051980 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Fri, 26 Dec 2008 22:41:18 +0100 Subject: m68k: fix recursive dependency in Kconfig We had a recursive dependency between MMU_MOTOROLA and MMU_SUN3 Fix it by dropping the unused dependencies on MMU_MOTOROLA. MMU_MOTOROLA is set to y only using select so any dependencies are anyway ignored. Signed-off-by: Sam Ravnborg Acked-by: Geert Uytterhoeven Cc: Roman Zippel --- arch/m68k/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 836fb66f080d..c825bde17cb3 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -280,7 +280,6 @@ config M68060 config MMU_MOTOROLA bool - depends on MMU && !MMU_SUN3 config MMU_SUN3 bool -- cgit v1.2.3 From 0999769e6cad9b0e5abb7c513c0c3f16821f0884 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 3 Jan 2009 15:37:14 +1030 Subject: cris: define __fls Like fls, but can't be handed 0 and returns the bit number. Signed-off-by: Rusty Russell --- arch/cris/include/asm/bitops.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/cris/include/asm/bitops.h b/arch/cris/include/asm/bitops.h index c0e62f811e09..9e69cfb7f134 100644 --- a/arch/cris/include/asm/bitops.h +++ b/arch/cris/include/asm/bitops.h @@ -148,6 +148,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) #define ffs kernel_ffs #include +#include #include #include #include -- cgit v1.2.3 From 9ddabc2a29163e4b243d10c5e06fc5584073d7ad Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 3 Jan 2009 16:16:04 +1030 Subject: h8300: define __fls Like fls, but can't be handed 0 and returns the bit number. Signed-off-by: Rusty Russell --- arch/h8300/include/asm/bitops.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h index cb18e3b0aa94..cb9ddf5fc54f 100644 --- a/arch/h8300/include/asm/bitops.h +++ b/arch/h8300/include/asm/bitops.h @@ -207,6 +207,7 @@ static __inline__ unsigned long __ffs(unsigned long word) #endif /* __KERNEL__ */ #include +#include #include #endif /* _H8300_BITOPS_H */ -- cgit v1.2.3 From c4fa3864281c7d88b7262cbc6cbd5c90bb59860e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Nov 2008 15:51:19 +0100 Subject: KVM: rename vtd.c to iommu.c Impact: file renamed The code in the vtd.c file can be reused for other IOMMUs as well. So rename it to make it clear that it handle more than VT-d. Signed-off-by: Joerg Roedel --- arch/ia64/kvm/Makefile | 2 +- arch/x86/kvm/Makefile | 2 +- virt/kvm/iommu.c | 217 +++++++++++++++++++++++++++++++++++++++++++++++++ virt/kvm/vtd.c | 217 ------------------------------------------------- 4 files changed, 219 insertions(+), 219 deletions(-) create mode 100644 virt/kvm/iommu.c delete mode 100644 virt/kvm/vtd.c (limited to 'arch') diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 76464dc312e6..cb69dfcfb1a6 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -52,7 +52,7 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ coalesced_mmio.o irq_comm.o) ifeq ($(CONFIG_DMAR),y) -common-objs += $(addprefix ../../../virt/kvm/, vtd.o) +common-objs += $(addprefix ../../../virt/kvm/, iommu.o) endif kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index c02343594b4d..00f46c2d14bd 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -8,7 +8,7 @@ ifeq ($(CONFIG_KVM_TRACE),y) common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) endif ifeq ($(CONFIG_DMAR),y) -common-objs += $(addprefix ../../../virt/kvm/, vtd.o) +common-objs += $(addprefix ../../../virt/kvm/, iommu.o) endif EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c new file mode 100644 index 000000000000..d46de9af838b --- /dev/null +++ b/virt/kvm/iommu.c @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2006-2008 Intel Corporation + * Copyright IBM Corporation, 2008 + * Author: Allen M. Kay + * Author: Weidong Han + * Author: Ben-Ami Yassour + */ + +#include +#include +#include +#include +#include + +static int kvm_iommu_unmap_memslots(struct kvm *kvm); +static void kvm_iommu_put_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages); + +int kvm_iommu_map_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages) +{ + gfn_t gfn = base_gfn; + pfn_t pfn; + int i, r = 0; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + for (i = 0; i < npages; i++) { + /* check if already mapped */ + if (intel_iommu_iova_to_phys(domain, + gfn_to_gpa(gfn))) + continue; + + pfn = gfn_to_pfn(kvm, gfn); + r = intel_iommu_map_address(domain, + gfn_to_gpa(gfn), + pfn_to_hpa(pfn), + PAGE_SIZE, + DMA_PTE_READ | DMA_PTE_WRITE); + if (r) { + printk(KERN_ERR "kvm_iommu_map_address:" + "iommu failed to map pfn=%lx\n", pfn); + goto unmap_pages; + } + gfn++; + } + return 0; + +unmap_pages: + kvm_iommu_put_pages(kvm, base_gfn, i); + return r; +} + +static int kvm_iommu_map_memslots(struct kvm *kvm) +{ + int i, r; + + down_read(&kvm->slots_lock); + for (i = 0; i < kvm->nmemslots; i++) { + r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, + kvm->memslots[i].npages); + if (r) + break; + } + up_read(&kvm->slots_lock); + return r; +} + +int kvm_assign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + struct pci_dev *pdev = NULL; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + int r; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + pdev = assigned_dev->dev; + if (pdev == NULL) + return -ENODEV; + + r = intel_iommu_attach_device(domain, pdev); + if (r) { + printk(KERN_ERR "assign device %x:%x.%x failed", + pdev->bus->number, + PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn)); + return r; + } + + printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", + assigned_dev->host_busnr, + PCI_SLOT(assigned_dev->host_devfn), + PCI_FUNC(assigned_dev->host_devfn)); + + return 0; +} + +int kvm_deassign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + struct pci_dev *pdev = NULL; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + pdev = assigned_dev->dev; + if (pdev == NULL) + return -ENODEV; + + intel_iommu_detach_device(domain, pdev); + + printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n", + assigned_dev->host_busnr, + PCI_SLOT(assigned_dev->host_devfn), + PCI_FUNC(assigned_dev->host_devfn)); + + return 0; +} + +int kvm_iommu_map_guest(struct kvm *kvm) +{ + int r; + + if (!intel_iommu_found()) { + printk(KERN_ERR "%s: intel iommu not found\n", __func__); + return -ENODEV; + } + + kvm->arch.intel_iommu_domain = intel_iommu_alloc_domain(); + if (!kvm->arch.intel_iommu_domain) + return -ENOMEM; + + r = kvm_iommu_map_memslots(kvm); + if (r) + goto out_unmap; + + return 0; + +out_unmap: + kvm_iommu_unmap_memslots(kvm); + return r; +} + +static void kvm_iommu_put_pages(struct kvm *kvm, + gfn_t base_gfn, unsigned long npages) +{ + gfn_t gfn = base_gfn; + pfn_t pfn; + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + unsigned long i; + u64 phys; + + /* check if iommu exists and in use */ + if (!domain) + return; + + for (i = 0; i < npages; i++) { + phys = intel_iommu_iova_to_phys(domain, + gfn_to_gpa(gfn)); + pfn = phys >> PAGE_SHIFT; + kvm_release_pfn_clean(pfn); + gfn++; + } + + intel_iommu_unmap_address(domain, + gfn_to_gpa(base_gfn), + PAGE_SIZE * npages); +} + +static int kvm_iommu_unmap_memslots(struct kvm *kvm) +{ + int i; + down_read(&kvm->slots_lock); + for (i = 0; i < kvm->nmemslots; i++) { + kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, + kvm->memslots[i].npages); + } + up_read(&kvm->slots_lock); + + return 0; +} + +int kvm_iommu_unmap_guest(struct kvm *kvm) +{ + struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + kvm_iommu_unmap_memslots(kvm); + intel_iommu_free_domain(domain); + return 0; +} diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c deleted file mode 100644 index d46de9af838b..000000000000 --- a/virt/kvm/vtd.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (c) 2006, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Copyright (C) 2006-2008 Intel Corporation - * Copyright IBM Corporation, 2008 - * Author: Allen M. Kay - * Author: Weidong Han - * Author: Ben-Ami Yassour - */ - -#include -#include -#include -#include -#include - -static int kvm_iommu_unmap_memslots(struct kvm *kvm); -static void kvm_iommu_put_pages(struct kvm *kvm, - gfn_t base_gfn, unsigned long npages); - -int kvm_iommu_map_pages(struct kvm *kvm, - gfn_t base_gfn, unsigned long npages) -{ - gfn_t gfn = base_gfn; - pfn_t pfn; - int i, r = 0; - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; - - /* check if iommu exists and in use */ - if (!domain) - return 0; - - for (i = 0; i < npages; i++) { - /* check if already mapped */ - if (intel_iommu_iova_to_phys(domain, - gfn_to_gpa(gfn))) - continue; - - pfn = gfn_to_pfn(kvm, gfn); - r = intel_iommu_map_address(domain, - gfn_to_gpa(gfn), - pfn_to_hpa(pfn), - PAGE_SIZE, - DMA_PTE_READ | DMA_PTE_WRITE); - if (r) { - printk(KERN_ERR "kvm_iommu_map_address:" - "iommu failed to map pfn=%lx\n", pfn); - goto unmap_pages; - } - gfn++; - } - return 0; - -unmap_pages: - kvm_iommu_put_pages(kvm, base_gfn, i); - return r; -} - -static int kvm_iommu_map_memslots(struct kvm *kvm) -{ - int i, r; - - down_read(&kvm->slots_lock); - for (i = 0; i < kvm->nmemslots; i++) { - r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn, - kvm->memslots[i].npages); - if (r) - break; - } - up_read(&kvm->slots_lock); - return r; -} - -int kvm_assign_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - struct pci_dev *pdev = NULL; - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; - int r; - - /* check if iommu exists and in use */ - if (!domain) - return 0; - - pdev = assigned_dev->dev; - if (pdev == NULL) - return -ENODEV; - - r = intel_iommu_attach_device(domain, pdev); - if (r) { - printk(KERN_ERR "assign device %x:%x.%x failed", - pdev->bus->number, - PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn)); - return r; - } - - printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", - assigned_dev->host_busnr, - PCI_SLOT(assigned_dev->host_devfn), - PCI_FUNC(assigned_dev->host_devfn)); - - return 0; -} - -int kvm_deassign_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; - struct pci_dev *pdev = NULL; - - /* check if iommu exists and in use */ - if (!domain) - return 0; - - pdev = assigned_dev->dev; - if (pdev == NULL) - return -ENODEV; - - intel_iommu_detach_device(domain, pdev); - - printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n", - assigned_dev->host_busnr, - PCI_SLOT(assigned_dev->host_devfn), - PCI_FUNC(assigned_dev->host_devfn)); - - return 0; -} - -int kvm_iommu_map_guest(struct kvm *kvm) -{ - int r; - - if (!intel_iommu_found()) { - printk(KERN_ERR "%s: intel iommu not found\n", __func__); - return -ENODEV; - } - - kvm->arch.intel_iommu_domain = intel_iommu_alloc_domain(); - if (!kvm->arch.intel_iommu_domain) - return -ENOMEM; - - r = kvm_iommu_map_memslots(kvm); - if (r) - goto out_unmap; - - return 0; - -out_unmap: - kvm_iommu_unmap_memslots(kvm); - return r; -} - -static void kvm_iommu_put_pages(struct kvm *kvm, - gfn_t base_gfn, unsigned long npages) -{ - gfn_t gfn = base_gfn; - pfn_t pfn; - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; - unsigned long i; - u64 phys; - - /* check if iommu exists and in use */ - if (!domain) - return; - - for (i = 0; i < npages; i++) { - phys = intel_iommu_iova_to_phys(domain, - gfn_to_gpa(gfn)); - pfn = phys >> PAGE_SHIFT; - kvm_release_pfn_clean(pfn); - gfn++; - } - - intel_iommu_unmap_address(domain, - gfn_to_gpa(base_gfn), - PAGE_SIZE * npages); -} - -static int kvm_iommu_unmap_memslots(struct kvm *kvm) -{ - int i; - down_read(&kvm->slots_lock); - for (i = 0; i < kvm->nmemslots; i++) { - kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn, - kvm->memslots[i].npages); - } - up_read(&kvm->slots_lock); - - return 0; -} - -int kvm_iommu_unmap_guest(struct kvm *kvm) -{ - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; - - /* check if iommu exists and in use */ - if (!domain) - return 0; - - kvm_iommu_unmap_memslots(kvm); - intel_iommu_free_domain(domain); - return 0; -} -- cgit v1.2.3 From 1aaf118352b85bb359ce28070bcc478f659a7031 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Nov 2008 17:25:13 +0100 Subject: select IOMMU_API when DMAR and/or AMD_IOMMU is selected These two IOMMUs can implement the current version of this API. So select the API if one or both of these IOMMU drivers is selected. Signed-off-by: Joerg Roedel --- arch/ia64/Kconfig | 3 +++ arch/x86/Kconfig | 3 +++ drivers/base/Makefile | 1 + 3 files changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 7fa8f615ba6e..3d31636cbafb 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -687,3 +687,6 @@ config IRQ_PER_CPU config IOMMU_HELPER def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB) + +config IOMMU_API + def_bool (DMAR) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 249d1e0824b5..4737435b00d4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -599,6 +599,9 @@ config SWIOTLB config IOMMU_HELPER def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) +config IOMMU_API + def_bool (AMD_IOMMU || DMAR) + config MAXSMP bool "Configure Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL diff --git a/drivers/base/Makefile b/drivers/base/Makefile index c66637392bbc..b5b8ba512b28 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_FW_LOADER) += firmware_class.o obj-$(CONFIG_NUMA) += node.o obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o obj-$(CONFIG_SMP) += topology.o +obj-$(CONFIG_IOMMU_API) += iommu.o ifeq ($(CONFIG_SYSFS),y) obj-$(CONFIG_MODULES) += module.o endif -- cgit v1.2.3 From 19de40a8472fa64693eab844911eec277d489f6c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 14:43:34 +0100 Subject: KVM: change KVM to use IOMMU API Signed-off-by: Joerg Roedel --- arch/ia64/include/asm/kvm_host.h | 2 +- arch/ia64/kvm/Makefile | 2 +- arch/ia64/kvm/kvm-ia64.c | 3 ++- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/Makefile | 2 +- arch/x86/kvm/x86.c | 3 ++- include/linux/kvm_host.h | 6 +++--- virt/kvm/iommu.c | 45 +++++++++++++++++++--------------------- virt/kvm/kvm_main.c | 2 +- 9 files changed, 33 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 0560f3fae538..348663661659 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -467,7 +467,7 @@ struct kvm_arch { struct kvm_sal_data rdv_sal_data; struct list_head assigned_dev_head; - struct dmar_domain *intel_iommu_domain; + struct iommu_domain *iommu_domain; struct hlist_head irq_ack_notifier_list; unsigned long irq_sources_bitmap; diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index cb69dfcfb1a6..0bb99b732908 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -51,7 +51,7 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ coalesced_mmio.o irq_comm.o) -ifeq ($(CONFIG_DMAR),y) +ifeq ($(CONFIG_IOMMU_API),y) common-objs += $(addprefix ../../../virt/kvm/, iommu.o) endif diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 0f5ebd948437..4e586f6110aa 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -188,7 +189,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; case KVM_CAP_IOMMU: - r = intel_iommu_found(); + r = iommu_found(); break; default: r = 0; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 97215a458e5f..730843d1d2fb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -360,7 +360,7 @@ struct kvm_arch{ struct list_head active_mmu_pages; struct list_head assigned_dev_head; struct list_head oos_global_pages; - struct dmar_domain *intel_iommu_domain; + struct iommu_domain *iommu_domain; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 00f46c2d14bd..d3ec292f00f2 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -7,7 +7,7 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ ifeq ($(CONFIG_KVM_TRACE),y) common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) endif -ifeq ($(CONFIG_DMAR),y) +ifeq ($(CONFIG_IOMMU_API),y) common-objs += $(addprefix ../../../virt/kvm/, iommu.o) endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0e6aa8141dcd..cc17546a2406 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -989,7 +990,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = !tdp_enabled; break; case KVM_CAP_IOMMU: - r = intel_iommu_found(); + r = iommu_found(); break; default: r = 0; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e62a4629e51c..ec49d0be7f52 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -328,7 +328,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); -#ifdef CONFIG_DMAR +#ifdef CONFIG_IOMMU_API int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); int kvm_iommu_map_guest(struct kvm *kvm); @@ -337,7 +337,7 @@ int kvm_assign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev); int kvm_deassign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev); -#else /* CONFIG_DMAR */ +#else /* CONFIG_IOMMU_API */ static inline int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages) @@ -366,7 +366,7 @@ static inline int kvm_deassign_device(struct kvm *kvm, { return 0; } -#endif /* CONFIG_DMAR */ +#endif /* CONFIG_IOMMU_API */ static inline void kvm_guest_enter(void) { diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index d46de9af838b..d0bebaa5bf0a 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -25,6 +25,7 @@ #include #include #include +#include #include static int kvm_iommu_unmap_memslots(struct kvm *kvm); @@ -37,7 +38,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, gfn_t gfn = base_gfn; pfn_t pfn; int i, r = 0; - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + struct iommu_domain *domain = kvm->arch.iommu_domain; /* check if iommu exists and in use */ if (!domain) @@ -45,16 +46,15 @@ int kvm_iommu_map_pages(struct kvm *kvm, for (i = 0; i < npages; i++) { /* check if already mapped */ - if (intel_iommu_iova_to_phys(domain, - gfn_to_gpa(gfn))) + if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) continue; pfn = gfn_to_pfn(kvm, gfn); - r = intel_iommu_map_address(domain, - gfn_to_gpa(gfn), - pfn_to_hpa(pfn), - PAGE_SIZE, - DMA_PTE_READ | DMA_PTE_WRITE); + r = iommu_map_range(domain, + gfn_to_gpa(gfn), + pfn_to_hpa(pfn), + PAGE_SIZE, + IOMMU_READ | IOMMU_WRITE); if (r) { printk(KERN_ERR "kvm_iommu_map_address:" "iommu failed to map pfn=%lx\n", pfn); @@ -88,7 +88,7 @@ int kvm_assign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev) { struct pci_dev *pdev = NULL; - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + struct iommu_domain *domain = kvm->arch.iommu_domain; int r; /* check if iommu exists and in use */ @@ -99,7 +99,7 @@ int kvm_assign_device(struct kvm *kvm, if (pdev == NULL) return -ENODEV; - r = intel_iommu_attach_device(domain, pdev); + r = iommu_attach_device(domain, &pdev->dev); if (r) { printk(KERN_ERR "assign device %x:%x.%x failed", pdev->bus->number, @@ -119,7 +119,7 @@ int kvm_assign_device(struct kvm *kvm, int kvm_deassign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev) { - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + struct iommu_domain *domain = kvm->arch.iommu_domain; struct pci_dev *pdev = NULL; /* check if iommu exists and in use */ @@ -130,7 +130,7 @@ int kvm_deassign_device(struct kvm *kvm, if (pdev == NULL) return -ENODEV; - intel_iommu_detach_device(domain, pdev); + iommu_detach_device(domain, &pdev->dev); printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n", assigned_dev->host_busnr, @@ -144,13 +144,13 @@ int kvm_iommu_map_guest(struct kvm *kvm) { int r; - if (!intel_iommu_found()) { - printk(KERN_ERR "%s: intel iommu not found\n", __func__); + if (!iommu_found()) { + printk(KERN_ERR "%s: iommu not found\n", __func__); return -ENODEV; } - kvm->arch.intel_iommu_domain = intel_iommu_alloc_domain(); - if (!kvm->arch.intel_iommu_domain) + kvm->arch.iommu_domain = iommu_domain_alloc(); + if (!kvm->arch.iommu_domain) return -ENOMEM; r = kvm_iommu_map_memslots(kvm); @@ -169,7 +169,7 @@ static void kvm_iommu_put_pages(struct kvm *kvm, { gfn_t gfn = base_gfn; pfn_t pfn; - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + struct iommu_domain *domain = kvm->arch.iommu_domain; unsigned long i; u64 phys; @@ -178,16 +178,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm, return; for (i = 0; i < npages; i++) { - phys = intel_iommu_iova_to_phys(domain, - gfn_to_gpa(gfn)); + phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); pfn = phys >> PAGE_SHIFT; kvm_release_pfn_clean(pfn); gfn++; } - intel_iommu_unmap_address(domain, - gfn_to_gpa(base_gfn), - PAGE_SIZE * npages); + iommu_unmap_range(domain, gfn_to_gpa(base_gfn), PAGE_SIZE * npages); } static int kvm_iommu_unmap_memslots(struct kvm *kvm) @@ -205,13 +202,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm) int kvm_iommu_unmap_guest(struct kvm *kvm) { - struct dmar_domain *domain = kvm->arch.intel_iommu_domain; + struct iommu_domain *domain = kvm->arch.iommu_domain; /* check if iommu exists and in use */ if (!domain) return 0; kvm_iommu_unmap_memslots(kvm); - intel_iommu_free_domain(domain); + iommu_domain_free(domain); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4ef0fb43d1f9..3a5a08298aab 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -504,7 +504,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, list_add(&match->list, &kvm->arch.assigned_dev_head); if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { - if (!kvm->arch.intel_iommu_domain) { + if (!kvm->arch.iommu_domain) { r = kvm_iommu_map_guest(kvm); if (r) goto out_list_del; -- cgit v1.2.3 From 38e817febe2f12bd2fbf92a1df36f41946d0c223 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 17:27:52 +0100 Subject: AMD IOMMU: rename iommu_map to iommu_map_page Impact: function rename The iommu_map function maps only one page. Make this clear in the function name. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 2e2da717b350..b11c855af7b4 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -338,10 +338,10 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) * supporting all features of AMD IOMMU page tables like level skipping * and full 64 bit address spaces. */ -static int iommu_map(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - int prot) +static int iommu_map_page(struct protection_domain *dom, + unsigned long bus_addr, + unsigned long phys_addr, + int prot) { u64 __pte, *pte, *page; @@ -440,7 +440,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, for (addr = e->address_start; addr < e->address_end; addr += PAGE_SIZE) { - ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); + ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot); if (ret) return ret; /* -- cgit v1.2.3 From 86db2e5d47bfa61a151d6ac83263f4bde4d52290 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 18:20:21 +0100 Subject: AMD IOMMU: make dma_ops_free_pagetable generic Impact: change code to free pagetables from protection domains The dma_ops_free_pagetable function can only free pagetables from dma_ops domains. Change that to free pagetables of pure protection domains. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index b11c855af7b4..8a0fd3d09973 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -587,12 +587,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, iommu_area_reserve(dom->bitmap, start_page, pages); } -static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) +static void free_pagetable(struct protection_domain *domain) { int i, j; u64 *p1, *p2, *p3; - p1 = dma_dom->domain.pt_root; + p1 = domain->pt_root; if (!p1) return; @@ -613,6 +613,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) } free_page((unsigned long)p1); + + domain->pt_root = NULL; } /* @@ -624,7 +626,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) if (!dom) return; - dma_ops_free_pagetable(dom); + free_pagetable(&dom->domain); kfree(dom->pte_pages); -- cgit v1.2.3 From a2acfb75792511a35586db80a38b8e4701a97730 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 18:28:53 +0100 Subject: AMD IOMMU: add domain id free function Impact: add code to release a domain id Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 8a0fd3d09973..0922d5fe633c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -571,6 +571,18 @@ static u16 domain_id_alloc(void) return id; } +#ifdef CONFIG_IOMMU_API +static void domain_id_free(int id) +{ + unsigned long flags; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + if (id > 0 && id < MAX_DOMAIN_ID) + __clear_bit(id, amd_iommu_pd_alloc_bitmap); + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); +} +#endif + /* * Used to reserve address ranges in the aperture (e.g. for exclusion * ranges. -- cgit v1.2.3 From 8d201968e15f56ae2837b0d0b64d3fff098857b0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 20:34:41 +0100 Subject: AMD IOMMU: refactor completion wait handling into separate functions Impact: split one function into three The separate functions are required synchronize commands across all hardware IOMMUs in the system. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 68 ++++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0922d5fe633c..2280ef86651f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -195,6 +195,46 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) return ret; } +/* + * This function waits until an IOMMU has completed a completion + * wait command + */ +static void __iommu_wait_for_completion(struct amd_iommu *iommu) +{ + int ready = 0; + unsigned status = 0; + unsigned long i = 0; + + while (!ready && (i < EXIT_LOOP_COUNT)) { + ++i; + /* wait for the bit to become one */ + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; + } + + /* set bit back to zero */ + status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; + writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); + + if (unlikely(i == EXIT_LOOP_COUNT)) + panic("AMD IOMMU: Completion wait loop failed\n"); +} + +/* + * This function queues a completion wait command into the command + * buffer of an IOMMU + */ +static int __iommu_completion_wait(struct amd_iommu *iommu) +{ + struct iommu_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; + CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); + + return __iommu_queue_command(iommu, &cmd); +} + /* * This function is called whenever we need to ensure that the IOMMU has * completed execution of all commands we sent. It sends a @@ -204,40 +244,22 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) */ static int iommu_completion_wait(struct amd_iommu *iommu) { - int ret = 0, ready = 0; - unsigned status = 0; - struct iommu_cmd cmd; - unsigned long flags, i = 0; - - memset(&cmd, 0, sizeof(cmd)); - cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; - CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); + int ret = 0; + unsigned long flags; spin_lock_irqsave(&iommu->lock, flags); if (!iommu->need_sync) goto out; - iommu->need_sync = 0; + ret = __iommu_completion_wait(iommu); - ret = __iommu_queue_command(iommu, &cmd); + iommu->need_sync = 0; if (ret) goto out; - while (!ready && (i < EXIT_LOOP_COUNT)) { - ++i; - /* wait for the bit to become one */ - status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); - ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; - } - - /* set bit back to zero */ - status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; - writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); - - if (unlikely(i == EXIT_LOOP_COUNT)) - panic("AMD IOMMU: Completion wait loop failed\n"); + __iommu_wait_for_completion(iommu); out: spin_unlock_irqrestore(&iommu->lock, flags); -- cgit v1.2.3 From 237b6f33291394c337ae84e2d3782d5605803af2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 20:54:37 +0100 Subject: AMD IOMMU: move invalidation command building to a separate function Impact: refactoring of iommu_queue_inv_iommu_pages Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 2280ef86651f..fee16fbf2f33 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -286,6 +286,21 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) return ret; } +static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, + u16 domid, int pde, int s) +{ + memset(cmd, 0, sizeof(*cmd)); + address &= PAGE_MASK; + CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); + cmd->data[1] |= domid; + cmd->data[2] = lower_32_bits(address); + cmd->data[3] = upper_32_bits(address); + if (s) /* size bit - we flush more than one 4kb page */ + cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; + if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ + cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; +} + /* * Generic command send function for invalidaing TLB entries */ @@ -295,16 +310,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, struct iommu_cmd cmd; int ret; - memset(&cmd, 0, sizeof(cmd)); - address &= PAGE_MASK; - CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); - cmd.data[1] |= domid; - cmd.data[2] = lower_32_bits(address); - cmd.data[3] = upper_32_bits(address); - if (s) /* size bit - we flush more than one 4kb page */ - cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; - if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ - cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; + __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s); ret = iommu_queue_command(iommu, &cmd); -- cgit v1.2.3 From 9e919012e33c481991e46aa4cb13d807cd47b798 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 10 Dec 2008 20:05:52 +0100 Subject: AMD IOMMU: don't remove protection domain from iommu_pd_list Impact: save unneeded logic to add and remove domains to the list The removal of a protection domain from the iommu_pd_list is not necessary. Another benefit is that we save complexity because we don't have to readd it later when the device no longer uses the domain. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index fee16fbf2f33..b7b3067630cf 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -844,7 +844,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) list_for_each_entry(entry, &iommu_pd_list, list) { if (entry->target_dev == devid) { ret = entry; - list_del(&ret->list); break; } } -- cgit v1.2.3 From 43f4960983a309568a6c4375f081e63fb2ff24a3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 21:01:12 +0100 Subject: AMD IOMMU: add iommu_flush_domain function Impact: add a function to flush a domain id on every IOMMU Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index b7b3067630cf..2b6b8e050bd8 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -352,6 +352,30 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); } +#ifdef CONFIG_IOMMU_API +/* + * This function is used to flush the IO/TLB for a given protection domain + * on every IOMMU in the system + */ +static void iommu_flush_domain(u16 domid) +{ + unsigned long flags; + struct amd_iommu *iommu; + struct iommu_cmd cmd; + + __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, + domid, 1, 1); + + list_for_each_entry(iommu, &amd_iommu_list, list) { + spin_lock_irqsave(&iommu->lock, flags); + __iommu_queue_command(iommu, &cmd); + __iommu_completion_wait(iommu); + __iommu_wait_for_completion(iommu); + spin_unlock_irqrestore(&iommu->lock, flags); + } +} +#endif + /**************************************************************************** * * The functions below are used the create the page table mappings for -- cgit v1.2.3 From 9fdb19d64c0247f23343b51fc85f438f8e7a2f3c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 17:46:25 +0100 Subject: AMD IOMMU: add protection domain flags Imapct: add a new struct member to 'struct protection_domain' When using protection domains for dma_ops and KVM its better to know for which subsystem it was allocated. Add a flags member to struct protection domain for that purpose. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 14 +++++++++----- arch/x86/kernel/amd_iommu.c | 1 + 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index ac302a2fa339..4862a5be899c 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -190,16 +190,20 @@ /* FIXME: move this macro to */ #define PCI_BUS(x) (((x) >> 8) & 0xff) +/* Protection domain flags */ +#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ + /* * This structure contains generic data for IOMMU protection domains * independent of their use. */ struct protection_domain { - spinlock_t lock; /* mostly used to lock the page table*/ - u16 id; /* the domain id written to the device table */ - int mode; /* paging mode (0-6 levels) */ - u64 *pt_root; /* page table root pointer */ - void *priv; /* private data */ + spinlock_t lock; /* mostly used to lock the page table*/ + u16 id; /* the domain id written to the device table */ + int mode; /* paging mode (0-6 levels) */ + u64 *pt_root; /* page table root pointer */ + unsigned long flags; /* flags to find out type of domain */ + void *priv; /* private data */ }; /* diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 2b6b8e050bd8..bb28e2cda711 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -729,6 +729,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, goto free_dma_dom; dma_dom->domain.mode = PAGE_MODE_3_LEVEL; dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); + dma_dom->domain.flags = PD_DMA_OPS_MASK; dma_dom->domain.priv = dma_dom; if (!dma_dom->domain.pt_root) goto free_dma_dom; -- cgit v1.2.3 From 5b28df6f43ac9878f310ad0cb7f11ddb262a7ac6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 17:49:42 +0100 Subject: AMD IOMMU: add checks for dma_ops domain to dma_ops functions Impact: detect when a driver uses a device assigned otherwise Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index bb28e2cda711..5c465c91150e 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -791,6 +791,15 @@ free_dma_dom: return NULL; } +/* + * little helper function to check whether a given protection domain is a + * dma_ops domain + */ +static bool dma_ops_domain(struct protection_domain *domain) +{ + return domain->flags & PD_DMA_OPS_MASK; +} + /* * Find out the protection domain structure for a given PCI device. This * will give us the pointer to the page table root for example. @@ -1096,6 +1105,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, /* device not handled by any AMD IOMMU */ return (dma_addr_t)paddr; + if (!dma_ops_domain(domain)) + return bad_dma_address; + spin_lock_irqsave(&domain->lock, flags); addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, dma_mask); @@ -1126,6 +1138,9 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, /* device not handled by any AMD IOMMU */ return; + if (!dma_ops_domain(domain)) + return; + spin_lock_irqsave(&domain->lock, flags); __unmap_single(iommu, domain->priv, dma_addr, size, dir); @@ -1180,6 +1195,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, if (!iommu || !domain) return map_sg_no_iommu(dev, sglist, nelems, dir); + if (!dma_ops_domain(domain)) + return 0; + spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { @@ -1233,6 +1251,9 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, !get_device_resources(dev, &iommu, &domain, &devid)) return; + if (!dma_ops_domain(domain)) + return; + spin_lock_irqsave(&domain->lock, flags); for_each_sg(sglist, s, nelems, i) { @@ -1278,6 +1299,9 @@ static void *alloc_coherent(struct device *dev, size_t size, return virt_addr; } + if (!dma_ops_domain(domain)) + goto out_free; + if (!dma_mask) dma_mask = *dev->dma_mask; @@ -1286,18 +1310,20 @@ static void *alloc_coherent(struct device *dev, size_t size, *dma_addr = __map_single(dev, iommu, domain->priv, paddr, size, DMA_BIDIRECTIONAL, true, dma_mask); - if (*dma_addr == bad_dma_address) { - free_pages((unsigned long)virt_addr, get_order(size)); - virt_addr = NULL; - goto out; - } + if (*dma_addr == bad_dma_address) + goto out_free; iommu_completion_wait(iommu); -out: spin_unlock_irqrestore(&domain->lock, flags); return virt_addr; + +out_free: + + free_pages((unsigned long)virt_addr, get_order(size)); + + return NULL; } /* @@ -1319,6 +1345,9 @@ static void free_coherent(struct device *dev, size_t size, if (!iommu || !domain) goto free_mem; + if (!dma_ops_domain(domain)) + goto free_mem; + spin_lock_irqsave(&domain->lock, flags); __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); -- cgit v1.2.3 From 863c74ebd0152b21bc4b11c1447b5d1429287d37 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 17:56:36 +0100 Subject: AMD IOMMU: add device reference counting for protection domains Impact: know how many devices are assigned to a domain Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 1 + arch/x86/kernel/amd_iommu.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 4862a5be899c..1c769f4e6cdf 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -203,6 +203,7 @@ struct protection_domain { int mode; /* paging mode (0-6 levels) */ u64 *pt_root; /* page table root pointer */ unsigned long flags; /* flags to find out type of domain */ + unsigned dev_cnt; /* devices assigned to this domain */ void *priv; /* private data */ }; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 5c465c91150e..8b45bc49d1c5 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -825,9 +825,10 @@ static void set_device_domain(struct amd_iommu *iommu, u16 devid) { unsigned long flags; - u64 pte_root = virt_to_phys(domain->pt_root); + domain->dev_cnt += 1; + pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; -- cgit v1.2.3 From f1179dc005ee2b0e55c3f74f3552c3e9ef852265 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 10 Dec 2008 14:39:51 +0100 Subject: AMD IOMMU: rename set_device_domain function Impact: rename set_device_domain() to attach_device() Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 8b45bc49d1c5..12e8b67e5881 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -820,9 +820,9 @@ static struct protection_domain *domain_for_device(u16 devid) * If a device is not yet associated with a domain, this function does * assigns it visible for the hardware */ -static void set_device_domain(struct amd_iommu *iommu, - struct protection_domain *domain, - u16 devid) +static void attach_device(struct amd_iommu *iommu, + struct protection_domain *domain, + u16 devid) { unsigned long flags; u64 pte_root = virt_to_phys(domain->pt_root); @@ -929,14 +929,14 @@ static int get_device_resources(struct device *dev, if (!dma_dom) dma_dom = (*iommu)->default_dom; *domain = &dma_dom->domain; - set_device_domain(*iommu, *domain, *bdf); + attach_device(*iommu, *domain, *bdf); printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " "device ", (*domain)->id); print_devid(_bdf, 1); } if (domain_for_device(_bdf) == NULL) - set_device_domain(*iommu, *domain, _bdf); + attach_device(*iommu, *domain, _bdf); return 1; } -- cgit v1.2.3 From 355bf553edb7fe21ada51f62c849180bec6da877 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Dec 2008 12:02:41 +0100 Subject: AMD IOMMU: add device detach helper functions Impact: add helper functions to detach a device from a domain Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 12e8b67e5881..15456a3a18c8 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -844,6 +844,45 @@ static void attach_device(struct amd_iommu *iommu, iommu_queue_inv_dev_entry(iommu, devid); } +#ifdef CONFIG_IOMMU_API +/* + * Removes a device from a protection domain (unlocked) + */ +static void __detach_device(struct protection_domain *domain, u16 devid) +{ + + /* lock domain */ + spin_lock(&domain->lock); + + /* remove domain from the lookup table */ + amd_iommu_pd_table[devid] = NULL; + + /* remove entry from the device table seen by the hardware */ + amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; + amd_iommu_dev_table[devid].data[1] = 0; + amd_iommu_dev_table[devid].data[2] = 0; + + /* decrease reference counter */ + domain->dev_cnt -= 1; + + /* ready */ + spin_unlock(&domain->lock); +} + +/* + * Removes a device from a protection domain (with devtable_lock held) + */ +static void detach_device(struct protection_domain *domain, u16 devid) +{ + unsigned long flags; + + /* lock device table */ + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + __detach_device(domain, devid); + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); +} +#endif + /***************************************************************************** * * The next functions belong to the dma_ops mapping/unmapping code. -- cgit v1.2.3 From e275a2a0fc9e2168b15f6c7814e30b7ad58b1c7c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 10 Dec 2008 18:27:25 +0100 Subject: AMD IOMMU: add device notifier callback Impact: inform IOMMU about state change of a device in the driver core Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 62 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 15456a3a18c8..140875b3f6ef 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -47,6 +47,8 @@ struct iommu_cmd { static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, struct unity_map_entry *e); +static struct dma_ops_domain *find_protection_domain(u16 devid); + /* returns !0 if the IOMMU is caching non-present entries in its TLB */ static int iommu_has_npcache(struct amd_iommu *iommu) @@ -844,7 +846,6 @@ static void attach_device(struct amd_iommu *iommu, iommu_queue_inv_dev_entry(iommu, devid); } -#ifdef CONFIG_IOMMU_API /* * Removes a device from a protection domain (unlocked) */ @@ -881,7 +882,62 @@ static void detach_device(struct protection_domain *domain, u16 devid) __detach_device(domain, devid); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } -#endif + +static int device_change_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + struct pci_dev *pdev = to_pci_dev(dev); + u16 devid = calc_devid(pdev->bus->number, pdev->devfn); + struct protection_domain *domain; + struct dma_ops_domain *dma_domain; + struct amd_iommu *iommu; + + if (devid > amd_iommu_last_bdf) + goto out; + + devid = amd_iommu_alias_table[devid]; + + iommu = amd_iommu_rlookup_table[devid]; + if (iommu == NULL) + goto out; + + domain = domain_for_device(devid); + + if (domain && !dma_ops_domain(domain)) + WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " + "to a non-dma-ops domain\n", dev_name(dev)); + + switch (action) { + case BUS_NOTIFY_BOUND_DRIVER: + if (domain) + goto out; + dma_domain = find_protection_domain(devid); + if (!dma_domain) + dma_domain = iommu->default_dom; + attach_device(iommu, &dma_domain->domain, devid); + printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " + "device %s\n", dma_domain->domain.id, dev_name(dev)); + break; + case BUS_NOTIFY_UNBIND_DRIVER: + if (!domain) + goto out; + detach_device(domain, devid); + break; + default: + goto out; + } + + iommu_queue_inv_dev_entry(iommu, devid); + iommu_completion_wait(iommu); + +out: + return 0; +} + +struct notifier_block device_nb = { + .notifier_call = device_change_notifier, +}; /***************************************************************************** * @@ -1510,6 +1566,8 @@ int __init amd_iommu_init_dma_ops(void) /* Make the driver finally visible to the drivers */ dma_ops = &amd_iommu_dma_ops; + bus_register_notifier(&pci_bus_type, &device_nb); + return 0; free_domains: -- cgit v1.2.3 From 6d98cd8043c13438e4ca8a9464893f0224198a30 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Dec 2008 12:05:55 +0100 Subject: AMD IOMMU: add domain cleanup helper function Impact: add a function to remove all devices from a domain Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 140875b3f6ef..5d3f085289eb 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1579,3 +1579,31 @@ free_domains: return ret; } + +/***************************************************************************** + * + * The following functions belong to the exported interface of AMD IOMMU + * + * This interface allows access to lower level functions of the IOMMU + * like protection domain handling and assignement of devices to domains + * which is not possible with the dma_ops interface. + * + *****************************************************************************/ + +#ifdef CONFIG_IOMMU_API + +static void cleanup_domain(struct protection_domain *domain) +{ + unsigned long flags; + u16 devid; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) + if (amd_iommu_pd_table[devid] == domain) + __detach_device(domain, devid); + + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); +} + +#endif -- cgit v1.2.3 From c156e347d6d3c36b6843c3b168eda61b9a02c827 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 18:13:27 +0100 Subject: AMD IOMMU: add domain init function for IOMMU API Impact: add a generic function for allocation protection domains Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 5d3f085289eb..6c0bd49cee5f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -22,6 +22,9 @@ #include #include #include +#ifdef CONFIG_IOMMU_API +#include +#endif #include #include #include @@ -1606,4 +1609,31 @@ static void cleanup_domain(struct protection_domain *domain) write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } +static int amd_iommu_domain_init(struct iommu_domain *dom) +{ + struct protection_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return -ENOMEM; + + spin_lock_init(&domain->lock); + domain->mode = PAGE_MODE_3_LEVEL; + domain->id = domain_id_alloc(); + if (!domain->id) + goto out_free; + domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); + if (!domain->pt_root) + goto out_free; + + dom->priv = domain; + + return 0; + +out_free: + kfree(domain); + + return -ENOMEM; +} + #endif -- cgit v1.2.3 From 98383fc301c6546af0f3a8a1d3cb8bf218f7e940 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 18:34:12 +0100 Subject: AMD IOMMU: add domain destroy function for IOMMU API Impact: add a generic function for releasing protection domains Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 6c0bd49cee5f..891d713d9c96 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1636,4 +1636,25 @@ out_free: return -ENOMEM; } +static void amd_iommu_domain_destroy(struct iommu_domain *dom) +{ + struct protection_domain *domain = dom->priv; + + if (!domain) + return; + + if (domain->dev_cnt > 0) + cleanup_domain(domain); + + BUG_ON(domain->dev_cnt != 0); + + free_pagetable(domain); + + domain_id_free(domain->id); + + kfree(domain); + + dom->priv = NULL; +} + #endif -- cgit v1.2.3 From 684f2888847b896faafed87ce4733501d2cc283c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 8 Dec 2008 12:07:44 +0100 Subject: AMD IOMMU: add device detach function for IOMMU API Impact: add a generic function to detach devices from protection domains Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 891d713d9c96..ef9b309e8e09 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1657,4 +1657,30 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) dom->priv = NULL; } +static void amd_iommu_detach_device(struct iommu_domain *dom, + struct device *dev) +{ + struct protection_domain *domain = dom->priv; + struct amd_iommu *iommu; + struct pci_dev *pdev; + u16 devid; + + if (dev->bus != &pci_bus_type) + return; + + pdev = to_pci_dev(dev); + + devid = calc_devid(pdev->bus->number, pdev->devfn); + + if (devid > 0) + detach_device(domain, devid); + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + return; + + iommu_queue_inv_dev_entry(iommu, devid); + iommu_completion_wait(iommu); +} + #endif -- cgit v1.2.3 From 01106066a6900b518debe990ddaadf376d433bd6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 19:34:11 +0100 Subject: AMD IOMMU: add device attach function for IOMMU API Impact: add a generic function to attach devices to protection domains Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index ef9b309e8e09..2f7c0b3a448b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1683,4 +1683,39 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, iommu_completion_wait(iommu); } +static int amd_iommu_attach_device(struct iommu_domain *dom, + struct device *dev) +{ + struct protection_domain *domain = dom->priv; + struct protection_domain *old_domain; + struct amd_iommu *iommu; + struct pci_dev *pdev; + u16 devid; + + if (dev->bus != &pci_bus_type) + return -EINVAL; + + pdev = to_pci_dev(dev); + + devid = calc_devid(pdev->bus->number, pdev->devfn); + + if (devid >= amd_iommu_last_bdf || + devid != amd_iommu_alias_table[devid]) + return -EINVAL; + + iommu = amd_iommu_rlookup_table[devid]; + if (!iommu) + return -EINVAL; + + old_domain = domain_for_device(devid); + if (old_domain) + return -EBUSY; + + attach_device(iommu, domain, devid); + + iommu_completion_wait(iommu); + + return 0; +} + #endif -- cgit v1.2.3 From c6229ca649aa9b312d1f1de20af8d2603b14eead Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 19:48:43 +0100 Subject: AMD IOMMU: add domain map function for IOMMU API Impact: add a generic function to map pages into protection domains Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 2f7c0b3a448b..1fcedbe39a99 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1718,4 +1718,33 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, return 0; } +static int amd_iommu_map_range(struct iommu_domain *dom, + unsigned long iova, phys_addr_t paddr, + size_t size, int iommu_prot) +{ + struct protection_domain *domain = dom->priv; + unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE); + int prot = 0; + int ret; + + if (iommu_prot & IOMMU_READ) + prot |= IOMMU_PROT_IR; + if (iommu_prot & IOMMU_WRITE) + prot |= IOMMU_PROT_IW; + + iova &= PAGE_MASK; + paddr &= PAGE_MASK; + + for (i = 0; i < npages; ++i) { + ret = iommu_map_page(domain, iova, paddr, prot); + if (ret) + return ret; + + iova += PAGE_SIZE; + paddr += PAGE_SIZE; + } + + return 0; +} + #endif -- cgit v1.2.3 From eb74ff6cc0080c7f6270fdfffba65c4eff23d3ad Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 19:59:10 +0100 Subject: AMD IOMMU: add domain unmap function for IOMMU API Impact: add a generic function to unmap pages into protection domains Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 1fcedbe39a99..d8a0abf423ba 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -445,6 +445,30 @@ static int iommu_map_page(struct protection_domain *dom, return 0; } +#ifdef CONFIG_IOMMU_API +static void iommu_unmap_page(struct protection_domain *dom, + unsigned long bus_addr) +{ + u64 *pte; + + pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; + + if (!IOMMU_PTE_PRESENT(*pte)) + return; + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; + + if (!IOMMU_PTE_PRESENT(*pte)) + return; + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; + + *pte = 0; +} +#endif + /* * This function checks if a specific unity mapping entry is needed for * this specific IOMMU. @@ -1747,4 +1771,21 @@ static int amd_iommu_map_range(struct iommu_domain *dom, return 0; } +static void amd_iommu_unmap_range(struct iommu_domain *dom, + unsigned long iova, size_t size) +{ + + struct protection_domain *domain = dom->priv; + unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE); + + iova &= PAGE_MASK; + + for (i = 0; i < npages; ++i) { + iommu_unmap_page(domain, iova); + iova += PAGE_SIZE; + } + + iommu_flush_domain(domain->id); +} + #endif -- cgit v1.2.3 From 645c4c8d7289a718c9c828ec217f2b94e3b3e6ff Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 2 Dec 2008 20:05:50 +0100 Subject: AMD IOMMU: add domain address lookup function for IOMMU API Impact: add a generic function to lockup addresses in protection domains Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index d8a0abf423ba..b599e80051fc 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1788,4 +1788,35 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, iommu_flush_domain(domain->id); } +static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, + unsigned long iova) +{ + struct protection_domain *domain = dom->priv; + unsigned long offset = iova & ~PAGE_MASK; + phys_addr_t paddr; + u64 *pte; + + pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)]; + + if (!IOMMU_PTE_PRESENT(*pte)) + return 0; + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L1_INDEX(iova)]; + + if (!IOMMU_PTE_PRESENT(*pte)) + return 0; + + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[IOMMU_PTE_L0_INDEX(iova)]; + + if (!IOMMU_PTE_PRESENT(*pte)) + return 0; + + paddr = *pte & IOMMU_PAGE_MASK; + paddr |= offset; + + return paddr; +} + #endif -- cgit v1.2.3 From 26961efe0dab9ca73f8fc3b6137b814252e04972 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Dec 2008 17:00:17 +0100 Subject: AMD IOMMU: register functions for the IOMMU API Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index b599e80051fc..d9b651c01186 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -41,6 +41,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); static LIST_HEAD(iommu_pd_list); static DEFINE_SPINLOCK(iommu_pd_list_lock); +#ifdef CONFIG_IOMMU_API +static struct iommu_ops amd_iommu_ops; +#endif + /* * general struct to manage commands send to an IOMMU */ @@ -1593,6 +1597,10 @@ int __init amd_iommu_init_dma_ops(void) /* Make the driver finally visible to the drivers */ dma_ops = &amd_iommu_dma_ops; +#ifdef CONFIG_IOMMU_API + register_iommu(&amd_iommu_ops); +#endif + bus_register_notifier(&pci_bus_type, &device_nb); return 0; @@ -1819,4 +1827,14 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, return paddr; } +static struct iommu_ops amd_iommu_ops = { + .domain_init = amd_iommu_domain_init, + .domain_destroy = amd_iommu_domain_destroy, + .attach_dev = amd_iommu_attach_device, + .detach_dev = amd_iommu_detach_device, + .map = amd_iommu_map_range, + .unmap = amd_iommu_unmap_range, + .iova_to_phys = amd_iommu_iova_to_phys, +}; + #endif -- cgit v1.2.3 From e2dc14a2a6c9a83baaafc51f06b7e73cec2167be Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 10 Dec 2008 18:48:59 +0100 Subject: AMD IOMMU: add a domain flag for default domains Impact: adds a new protection domain flag Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 2 ++ arch/x86/kernel/amd_iommu.c | 1 + 2 files changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 1c769f4e6cdf..6adc7020ef97 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -192,6 +192,8 @@ /* Protection domain flags */ #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ +#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops + domain for an IOMMU */ /* * This structure contains generic data for IOMMU protection domains diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index d9b651c01186..f2956546423b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1574,6 +1574,7 @@ int __init amd_iommu_init_dma_ops(void) iommu->default_dom = dma_ops_domain_alloc(iommu, order); if (iommu->default_dom == NULL) return -ENOMEM; + iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; ret = iommu_init_unity_mappings(iommu); if (ret) goto free_domains; -- cgit v1.2.3 From 1ac4cbbc5eb56de96d264d10f464ba5222815b1b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 10 Dec 2008 19:33:26 +0100 Subject: AMD IOMMU: allocate a new protection for hotplugged devices Impact: also hotplug devices benefit from device isolation Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f2956546423b..f2260609eadc 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -923,6 +923,8 @@ static int device_change_notifier(struct notifier_block *nb, struct protection_domain *domain; struct dma_ops_domain *dma_domain; struct amd_iommu *iommu; + int order = amd_iommu_aperture_order; + unsigned long flags; if (devid > amd_iommu_last_bdf) goto out; @@ -954,6 +956,21 @@ static int device_change_notifier(struct notifier_block *nb, if (!domain) goto out; detach_device(domain, devid); + break; + case BUS_NOTIFY_ADD_DEVICE: + /* allocate a protection domain if a device is added */ + dma_domain = find_protection_domain(devid); + if (dma_domain) + goto out; + dma_domain = dma_ops_domain_alloc(iommu, order); + if (!dma_domain) + goto out; + dma_domain->target_dev = devid; + + spin_lock_irqsave(&iommu_pd_list_lock, flags); + list_add_tail(&dma_domain->list, &iommu_pd_list); + spin_unlock_irqrestore(&iommu_pd_list_lock, flags); + break; default: goto out; -- cgit v1.2.3 From ab896722867602eb0e836717e8b857ad513800d8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 10 Dec 2008 19:43:07 +0100 Subject: AMD IOMMU: use dev_name instead of self-build print_devid Impact: use generic dev_name instead of own function Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 12 ------------ arch/x86/kernel/amd_iommu.c | 3 +-- 2 files changed, 1 insertion(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 6adc7020ef97..ee8cfa00017d 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -389,18 +389,6 @@ extern int amd_iommu_isolate; */ extern bool amd_iommu_unmap_flush; -/* takes a PCI device id and prints it out in a readable form */ -static inline void print_devid(u16 devid, int nl) -{ - int bus = devid >> 8; - int dev = devid >> 3 & 0x1f; - int fn = devid & 0x07; - - printk("%02x:%02x.%x", bus, dev, fn); - if (nl) - printk("\n"); -} - /* takes bus and device/function and returns the device id * FIXME: should that be in generic PCI code? */ static inline u16 calc_devid(u8 bus, u8 devfn) diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f2260609eadc..a53cf48d3be8 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1074,8 +1074,7 @@ static int get_device_resources(struct device *dev, *domain = &dma_dom->domain; attach_device(*iommu, *domain, *bdf); printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " - "device ", (*domain)->id); - print_devid(_bdf, 1); + "device %s\n", (*domain)->id, dev_name(dev)); } if (domain_for_device(_bdf) == NULL) -- cgit v1.2.3 From 0cfd7aa90be83a4d278810d231f9ef03f189b4f0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 10 Dec 2008 19:58:00 +0100 Subject: AMD IOMMU: convert iommu->need_sync to bool Impact: use bool instead of int for iommu->need_sync Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 2 +- arch/x86/kernel/amd_iommu.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index ee8cfa00017d..c4b144ece1f8 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -302,7 +302,7 @@ struct amd_iommu { bool int_enabled; /* if one, we need to send a completion wait command */ - int need_sync; + bool need_sync; /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a53cf48d3be8..e410e97e5b0a 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -198,7 +198,7 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) spin_lock_irqsave(&iommu->lock, flags); ret = __iommu_queue_command(iommu, cmd); if (!ret) - iommu->need_sync = 1; + iommu->need_sync = true; spin_unlock_irqrestore(&iommu->lock, flags); return ret; @@ -263,7 +263,7 @@ static int iommu_completion_wait(struct amd_iommu *iommu) ret = __iommu_completion_wait(iommu); - iommu->need_sync = 0; + iommu->need_sync = false; if (ret) goto out; -- cgit v1.2.3 From c226f853091577e665ebc02c064af4834d8d4f28 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 13:53:54 +0100 Subject: AMD IOMMU: convert amd_iommu_isolate to bool Impact: cleanup Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 2 +- arch/x86/kernel/amd_iommu_init.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index c4b144ece1f8..7abf9cf0c1fe 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -381,7 +381,7 @@ extern struct protection_domain **amd_iommu_pd_table; extern unsigned long *amd_iommu_pd_alloc_bitmap; /* will be 1 if device isolation is enabled */ -extern int amd_iommu_isolate; +extern bool amd_iommu_isolate; /* * If true, the addresses will be flushed on unmap time, not when diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c625800c55ca..47e163b4431e 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -122,7 +122,8 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ -int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ +bool amd_iommu_isolate = true; /* if true, device isolation is + enabled */ bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the @@ -1218,9 +1219,9 @@ static int __init parse_amd_iommu_options(char *str) { for (; *str; ++str) { if (strncmp(str, "isolate", 7) == 0) - amd_iommu_isolate = 1; + amd_iommu_isolate = true; if (strncmp(str, "share", 5) == 0) - amd_iommu_isolate = 0; + amd_iommu_isolate = false; if (strncmp(str, "fullflush", 9) == 0) amd_iommu_unmap_flush = true; } -- cgit v1.2.3 From edcb34da259c503a2ffd37e51a658672ba3bc7a2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 10 Dec 2008 20:01:45 +0100 Subject: AMD IOMMU: use calc_devid in prealloc_protection_domains Impact: cleanup Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index e410e97e5b0a..3011ea7a3f82 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1543,7 +1543,7 @@ void prealloc_protection_domains(void) u16 devid; while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - devid = (dev->bus->number << 8) | dev->devfn; + devid = calc_devid(dev->bus->number, dev->devfn); if (devid > amd_iommu_last_bdf) continue; devid = amd_iommu_alias_table[devid]; -- cgit v1.2.3 From a4e267c88b4acfc87ff2ab0cc8e9509878e9aaba Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 10 Dec 2008 20:04:18 +0100 Subject: AMD IOMMU: use dev_name in iommu_enable function Impact: cleanup Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 47e163b4431e..be81f6125c51 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -246,12 +246,8 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) /* Function to enable the hardware */ void __init iommu_enable(struct amd_iommu *iommu) { - printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " - "at %02x:%02x.%x cap 0x%hx\n", - iommu->dev->bus->number, - PCI_SLOT(iommu->dev->devfn), - PCI_FUNC(iommu->dev->devfn), - iommu->cap_ptr); + printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", + dev_name(&iommu->dev->dev), iommu->cap_ptr); iommu_feature_enable(iommu, CONTROL_IOMMU_EN); } -- cgit v1.2.3 From 2e117604a4e8f3f9cee4aec3373b0382159e152a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 11 Dec 2008 19:00:12 +0100 Subject: AMD IOMMU: add Kconfig entry for statistic collection code Impact: adds new Kconfig entry Signed-off-by: Joerg Roedel --- arch/x86/Kconfig | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 249d1e0824b5..f9998d276eb3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -586,6 +586,16 @@ config AMD_IOMMU your BIOS for an option to enable it or if you have an IVRS ACPI table. +config AMD_IOMMU_STATS + bool "Export AMD IOMMU statistics to debugfs" + depends on AMD_IOMMU + select DEBUG_FS + help + This option enables code in the AMD IOMMU driver to collect various + statistics about whats happening in the driver and exports that + information to userspace via debugfs. + If unsure, say N. + # need this always selected by IOMMU for the VIA workaround config SWIOTLB def_bool y if X86_64 -- cgit v1.2.3 From a9dddbe0497ab0df7ee729e8d4cb0ee2dec3e4ba Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 12:33:06 +0100 Subject: AMD IOMMU: add necessary header defines for stats counting Impact: add defines to make iommu stats collection configurable Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 7abf9cf0c1fe..1379c5fe86d0 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -396,4 +396,30 @@ static inline u16 calc_devid(u8 bus, u8 devfn) return (((u16)bus) << 8) | devfn; } +#ifdef CONFIG_AMD_IOMMU_STATS + +struct __iommu_counter { + char *name; + struct dentry *dent; + u64 value; +}; + +#define DECLARE_STATS_COUNTER(nm) \ + static struct __iommu_counter nm = { \ + .name = #nm, \ + } + +#define INC_STATS_COUNTER(name) name.value += 1 +#define ADD_STATS_COUNTER(name, x) name.value += (x) +#define SUB_STATS_COUNTER(name, x) name.value -= (x) + +#else /* CONFIG_AMD_IOMMU_STATS */ + +#define DECLARE_STATS_COUNTER(name) +#define INC_STATS_COUNTER(name) +#define ADD_STATS_COUNTER(name, x) +#define SUB_STATS_COUNTER(name, x) + +#endif /* CONFIG_AMD_IOMMU_STATS */ + #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ -- cgit v1.2.3 From 7f26508bbb76ce86aad1130ef6b7f1a4bb7de0c2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 13:50:21 +0100 Subject: AMD IOMMU: add init code for statistic collection Impact: create a new debugfs directory Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_types.h | 2 ++ arch/x86/kernel/amd_iommu.c | 37 ++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 1379c5fe86d0..95c8cd9d22b5 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -420,6 +420,8 @@ struct __iommu_counter { #define ADD_STATS_COUNTER(name, x) #define SUB_STATS_COUNTER(name, x) +static inline void amd_iommu_stats_init(void) { } + #endif /* CONFIG_AMD_IOMMU_STATS */ #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 3011ea7a3f82..f98f70626bc6 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_IOMMU_API @@ -57,6 +58,40 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, static struct dma_ops_domain *find_protection_domain(u16 devid); +#ifdef CONFIG_AMD_IOMMU_STATS + +/* + * Initialization code for statistics collection + */ + +static struct dentry *stats_dir; +static struct dentry *de_isolate; +static struct dentry *de_fflush; + +static void amd_iommu_stats_add(struct __iommu_counter *cnt) +{ + if (stats_dir == NULL) + return; + + cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir, + &cnt->value); +} + +static void amd_iommu_stats_init(void) +{ + stats_dir = debugfs_create_dir("amd-iommu", NULL); + if (stats_dir == NULL) + return; + + de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, + (u32 *)&amd_iommu_isolate); + + de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, + (u32 *)&amd_iommu_unmap_flush); +} + +#endif + /* returns !0 if the IOMMU is caching non-present entries in its TLB */ static int iommu_has_npcache(struct amd_iommu *iommu) { @@ -1620,6 +1655,8 @@ int __init amd_iommu_init_dma_ops(void) bus_register_notifier(&pci_bus_type, &device_nb); + amd_iommu_stats_init(); + return 0; free_domains: -- cgit v1.2.3 From da49f6df726ecaaee87757e8b65a560679d32f99 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 14:59:58 +0100 Subject: AMD IOMMU: add stats counter for completion wait events Impact: see number of completion wait events in debugfs Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f98f70626bc6..b21435748e0d 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -64,6 +64,8 @@ static struct dma_ops_domain *find_protection_domain(u16 devid); * Initialization code for statistics collection */ +DECLARE_STATS_COUNTER(compl_wait); + static struct dentry *stats_dir; static struct dentry *de_isolate; static struct dentry *de_fflush; @@ -88,6 +90,8 @@ static void amd_iommu_stats_init(void) de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, (u32 *)&amd_iommu_unmap_flush); + + amd_iommu_stats_add(&compl_wait); } #endif @@ -249,6 +253,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu) unsigned status = 0; unsigned long i = 0; + INC_STATS_COUNTER(compl_wait); + while (!ready && (i < EXIT_LOOP_COUNT)) { ++i; /* wait for the bit to become one */ -- cgit v1.2.3 From 0f2a86f200bc97ae6cefc5d3ac879094b3fcde48 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 15:05:16 +0100 Subject: AMD IOMMU: add stats counter for map_single requests Impact: see number of map_single requests in debugfs Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index b21435748e0d..ef377865eb76 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -65,6 +65,7 @@ static struct dma_ops_domain *find_protection_domain(u16 devid); */ DECLARE_STATS_COUNTER(compl_wait); +DECLARE_STATS_COUNTER(cnt_map_single); static struct dentry *stats_dir; static struct dentry *de_isolate; @@ -92,6 +93,7 @@ static void amd_iommu_stats_init(void) (u32 *)&amd_iommu_unmap_flush); amd_iommu_stats_add(&compl_wait); + amd_iommu_stats_add(&cnt_map_single); } #endif @@ -1278,6 +1280,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, dma_addr_t addr; u64 dma_mask; + INC_STATS_COUNTER(cnt_map_single); + if (!check_device(dev)) return bad_dma_address; -- cgit v1.2.3 From 146a6917fc30616401a090f55cff2b855ee5b2ab Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 15:07:12 +0100 Subject: AMD IOMMU: add stats counter for unmap_single requests Impact: see number of unmap_single requests in debugfs Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index ef377865eb76..71646c81421a 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -66,6 +66,7 @@ static struct dma_ops_domain *find_protection_domain(u16 devid); DECLARE_STATS_COUNTER(compl_wait); DECLARE_STATS_COUNTER(cnt_map_single); +DECLARE_STATS_COUNTER(cnt_unmap_single); static struct dentry *stats_dir; static struct dentry *de_isolate; @@ -94,6 +95,7 @@ static void amd_iommu_stats_init(void) amd_iommu_stats_add(&compl_wait); amd_iommu_stats_add(&cnt_map_single); + amd_iommu_stats_add(&cnt_unmap_single); } #endif @@ -1321,6 +1323,8 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, struct protection_domain *domain; u16 devid; + INC_STATS_COUNTER(cnt_unmap_single); + if (!check_device(dev) || !get_device_resources(dev, &iommu, &domain, &devid)) /* device not handled by any AMD IOMMU */ -- cgit v1.2.3 From d03f067a9d0a1cc09529427af9a15e15d32ba1de Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 15:09:48 +0100 Subject: AMD IOMMU: add stats counter for map_sg requests Impact: see number of map_sg requests in debugfs Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 71646c81421a..859ca741745b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -67,6 +67,7 @@ static struct dma_ops_domain *find_protection_domain(u16 devid); DECLARE_STATS_COUNTER(compl_wait); DECLARE_STATS_COUNTER(cnt_map_single); DECLARE_STATS_COUNTER(cnt_unmap_single); +DECLARE_STATS_COUNTER(cnt_map_sg); static struct dentry *stats_dir; static struct dentry *de_isolate; @@ -96,6 +97,7 @@ static void amd_iommu_stats_init(void) amd_iommu_stats_add(&compl_wait); amd_iommu_stats_add(&cnt_map_single); amd_iommu_stats_add(&cnt_unmap_single); + amd_iommu_stats_add(&cnt_map_sg); } #endif @@ -1377,6 +1379,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, int mapped_elems = 0; u64 dma_mask; + INC_STATS_COUNTER(cnt_map_sg); + if (!check_device(dev)) return 0; -- cgit v1.2.3 From 55877a6bcdf0843414eecc658550c6551f5b5e1d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 15:12:14 +0100 Subject: AMD IOMMU: add stats counter for unmap_sg requests Impact: see number of unmap_sg requests in debugfs Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 859ca741745b..49f2c8533e12 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -68,6 +68,7 @@ DECLARE_STATS_COUNTER(compl_wait); DECLARE_STATS_COUNTER(cnt_map_single); DECLARE_STATS_COUNTER(cnt_unmap_single); DECLARE_STATS_COUNTER(cnt_map_sg); +DECLARE_STATS_COUNTER(cnt_unmap_sg); static struct dentry *stats_dir; static struct dentry *de_isolate; @@ -98,6 +99,7 @@ static void amd_iommu_stats_init(void) amd_iommu_stats_add(&cnt_map_single); amd_iommu_stats_add(&cnt_unmap_single); amd_iommu_stats_add(&cnt_map_sg); + amd_iommu_stats_add(&cnt_unmap_sg); } #endif @@ -1443,6 +1445,8 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, u16 devid; int i; + INC_STATS_COUNTER(cnt_unmap_sg); + if (!check_device(dev) || !get_device_resources(dev, &iommu, &domain, &devid)) return; -- cgit v1.2.3 From c8f0fb36bffa9e21d214a2910b825567d52bfc2c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 15:14:21 +0100 Subject: AMD IOMMU: add stats counter for alloc_coherent requests Impact: see number of alloc_coherent requests in debugfs Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 49f2c8533e12..ecc89f8857b6 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -69,6 +69,7 @@ DECLARE_STATS_COUNTER(cnt_map_single); DECLARE_STATS_COUNTER(cnt_unmap_single); DECLARE_STATS_COUNTER(cnt_map_sg); DECLARE_STATS_COUNTER(cnt_unmap_sg); +DECLARE_STATS_COUNTER(cnt_alloc_coherent); static struct dentry *stats_dir; static struct dentry *de_isolate; @@ -100,6 +101,7 @@ static void amd_iommu_stats_init(void) amd_iommu_stats_add(&cnt_unmap_single); amd_iommu_stats_add(&cnt_map_sg); amd_iommu_stats_add(&cnt_unmap_sg); + amd_iommu_stats_add(&cnt_alloc_coherent); } #endif @@ -1481,6 +1483,8 @@ static void *alloc_coherent(struct device *dev, size_t size, phys_addr_t paddr; u64 dma_mask = dev->coherent_dma_mask; + INC_STATS_COUNTER(cnt_alloc_coherent); + if (!check_device(dev)) return NULL; -- cgit v1.2.3 From 5d31ee7e08b7713596b999a42e67491bdf3665b3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 15:16:38 +0100 Subject: AMD IOMMU: add stats counter for free_coherent requests Impact: see number of free_coherent requests in debugfs Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index ecc89f8857b6..112412d733ab 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -70,6 +70,7 @@ DECLARE_STATS_COUNTER(cnt_unmap_single); DECLARE_STATS_COUNTER(cnt_map_sg); DECLARE_STATS_COUNTER(cnt_unmap_sg); DECLARE_STATS_COUNTER(cnt_alloc_coherent); +DECLARE_STATS_COUNTER(cnt_free_coherent); static struct dentry *stats_dir; static struct dentry *de_isolate; @@ -102,6 +103,7 @@ static void amd_iommu_stats_init(void) amd_iommu_stats_add(&cnt_map_sg); amd_iommu_stats_add(&cnt_unmap_sg); amd_iommu_stats_add(&cnt_alloc_coherent); + amd_iommu_stats_add(&cnt_free_coherent); } #endif @@ -1541,6 +1543,8 @@ static void free_coherent(struct device *dev, size_t size, struct protection_domain *domain; u16 devid; + INC_STATS_COUNTER(cnt_free_coherent); + if (!check_device(dev)) return; -- cgit v1.2.3 From c1858976f5ef05122bb671f678beaf7e1fe1dd74 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 15:42:39 +0100 Subject: AMD IOMMU: add stats counter for cross-page request Impact: see number of requests for more than one page in debugfs Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 112412d733ab..f5455039b609 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -71,6 +71,7 @@ DECLARE_STATS_COUNTER(cnt_map_sg); DECLARE_STATS_COUNTER(cnt_unmap_sg); DECLARE_STATS_COUNTER(cnt_alloc_coherent); DECLARE_STATS_COUNTER(cnt_free_coherent); +DECLARE_STATS_COUNTER(cross_page); static struct dentry *stats_dir; static struct dentry *de_isolate; @@ -104,6 +105,7 @@ static void amd_iommu_stats_init(void) amd_iommu_stats_add(&cnt_unmap_sg); amd_iommu_stats_add(&cnt_alloc_coherent); amd_iommu_stats_add(&cnt_free_coherent); + amd_iommu_stats_add(&cross_page); } #endif @@ -1217,6 +1219,9 @@ static dma_addr_t __map_single(struct device *dev, pages = iommu_num_pages(paddr, size, PAGE_SIZE); paddr &= PAGE_MASK; + if (pages > 1) + INC_STATS_COUNTER(cross_page); + if (align) align_mask = (1UL << get_order(size)) - 1; -- cgit v1.2.3 From f57d98ae6979f7bcbf758023b4716f485385f903 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 15:46:29 +0100 Subject: AMD IOMMU: add stats counter for single iommu domain tlb flushes Impact: see number of single iommu domain tlb flushes in debugfs Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f5455039b609..e99022d3a394 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -72,6 +72,7 @@ DECLARE_STATS_COUNTER(cnt_unmap_sg); DECLARE_STATS_COUNTER(cnt_alloc_coherent); DECLARE_STATS_COUNTER(cnt_free_coherent); DECLARE_STATS_COUNTER(cross_page); +DECLARE_STATS_COUNTER(domain_flush_single); static struct dentry *stats_dir; static struct dentry *de_isolate; @@ -106,6 +107,7 @@ static void amd_iommu_stats_init(void) amd_iommu_stats_add(&cnt_alloc_coherent); amd_iommu_stats_add(&cnt_free_coherent); amd_iommu_stats_add(&cross_page); + amd_iommu_stats_add(&domain_flush_single); } #endif @@ -413,6 +415,8 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) { u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + INC_STATS_COUNTER(domain_flush_single); + iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); } -- cgit v1.2.3 From 18811f55d48e5f3ee70c4744c592f940022fa592 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 15:48:28 +0100 Subject: AMD IOMMU: add stats counter for domain tlb flushes Impact: see number of domain tlb flushes in debugfs Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index e99022d3a394..a897c7246dca 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -73,6 +73,7 @@ DECLARE_STATS_COUNTER(cnt_alloc_coherent); DECLARE_STATS_COUNTER(cnt_free_coherent); DECLARE_STATS_COUNTER(cross_page); DECLARE_STATS_COUNTER(domain_flush_single); +DECLARE_STATS_COUNTER(domain_flush_all); static struct dentry *stats_dir; static struct dentry *de_isolate; @@ -108,6 +109,7 @@ static void amd_iommu_stats_init(void) amd_iommu_stats_add(&cnt_free_coherent); amd_iommu_stats_add(&cross_page); amd_iommu_stats_add(&domain_flush_single); + amd_iommu_stats_add(&domain_flush_all); } #endif @@ -431,6 +433,8 @@ static void iommu_flush_domain(u16 domid) struct amd_iommu *iommu; struct iommu_cmd cmd; + INC_STATS_COUNTER(domain_flush_all); + __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, domid, 1, 1); -- cgit v1.2.3 From 5774f7c5fef2526bfa58eab628fbe91dce5e07b1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 15:57:30 +0100 Subject: AMD IOMMU: add statistics about allocated io memory Impact: see amount of allocated io memory in debugfs Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a897c7246dca..69f367b033a2 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -74,6 +74,7 @@ DECLARE_STATS_COUNTER(cnt_free_coherent); DECLARE_STATS_COUNTER(cross_page); DECLARE_STATS_COUNTER(domain_flush_single); DECLARE_STATS_COUNTER(domain_flush_all); +DECLARE_STATS_COUNTER(alloced_io_mem); static struct dentry *stats_dir; static struct dentry *de_isolate; @@ -110,6 +111,7 @@ static void amd_iommu_stats_init(void) amd_iommu_stats_add(&cross_page); amd_iommu_stats_add(&domain_flush_single); amd_iommu_stats_add(&domain_flush_all); + amd_iommu_stats_add(&alloced_io_mem); } #endif @@ -1246,6 +1248,8 @@ static dma_addr_t __map_single(struct device *dev, } address += offset; + ADD_STATS_COUNTER(alloced_io_mem, size); + if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { iommu_flush_tlb(iommu, dma_dom->domain.id); dma_dom->need_flush = false; @@ -1282,6 +1286,8 @@ static void __unmap_single(struct amd_iommu *iommu, start += PAGE_SIZE; } + SUB_STATS_COUNTER(alloced_io_mem, size); + dma_ops_free_addresses(dma_dom, dma_addr, pages); if (amd_iommu_unmap_flush || dma_dom->need_flush) { -- cgit v1.2.3 From 8ecaf8f19f0f0627d6ac6d69ed9472e7d307f35b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 12 Dec 2008 16:13:04 +0100 Subject: AMD IOMMU: add statistics about total number of map requests Impact: see total number of map requests in debugfs Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 69f367b033a2..0c504b207bf9 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -75,6 +75,7 @@ DECLARE_STATS_COUNTER(cross_page); DECLARE_STATS_COUNTER(domain_flush_single); DECLARE_STATS_COUNTER(domain_flush_all); DECLARE_STATS_COUNTER(alloced_io_mem); +DECLARE_STATS_COUNTER(total_map_requests); static struct dentry *stats_dir; static struct dentry *de_isolate; @@ -112,6 +113,7 @@ static void amd_iommu_stats_init(void) amd_iommu_stats_add(&domain_flush_single); amd_iommu_stats_add(&domain_flush_all); amd_iommu_stats_add(&alloced_io_mem); + amd_iommu_stats_add(&total_map_requests); } #endif @@ -1229,6 +1231,8 @@ static dma_addr_t __map_single(struct device *dev, pages = iommu_num_pages(paddr, size, PAGE_SIZE); paddr &= PAGE_MASK; + INC_STATS_COUNTER(total_map_requests); + if (pages > 1) INC_STATS_COUNTER(cross_page); -- cgit v1.2.3 From 0e93dd883537e628b809a2120854cd591c8935f1 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 29 Dec 2008 21:45:22 +0530 Subject: AMD IOMMU: prealloc_protection_domains should be static Impact: cleanup, reduce kernel size a bit, avoid sparse warning Fixes sparse warning: arch/x86/kernel/amd_iommu.c:1299:6: warning: symbol 'prealloc_protection_domains' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0c504b207bf9..881c68ffdf2e 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1622,7 +1622,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) * we don't need to preallocate the protection domains anymore. * For now we have to. */ -void prealloc_protection_domains(void) +static void prealloc_protection_domains(void) { struct pci_dev *dev = NULL; struct dma_ops_domain *dma_dom; -- cgit v1.2.3 From 065a6d68c71af2a3bdd080fa5aa353b76eede8f5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Sat, 3 Jan 2009 14:16:35 +0100 Subject: AMD IOMMU: remove now unnecessary #ifdefs The #ifdef's are no longer necessary when the iommu-api and the amd iommu updates are merged together. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 881c68ffdf2e..5113c080f0c4 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -426,7 +426,6 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); } -#ifdef CONFIG_IOMMU_API /* * This function is used to flush the IO/TLB for a given protection domain * on every IOMMU in the system @@ -450,7 +449,6 @@ static void iommu_flush_domain(u16 domid) spin_unlock_irqrestore(&iommu->lock, flags); } } -#endif /**************************************************************************** * @@ -516,7 +514,6 @@ static int iommu_map_page(struct protection_domain *dom, return 0; } -#ifdef CONFIG_IOMMU_API static void iommu_unmap_page(struct protection_domain *dom, unsigned long bus_addr) { @@ -538,7 +535,6 @@ static void iommu_unmap_page(struct protection_domain *dom, *pte = 0; } -#endif /* * This function checks if a specific unity mapping entry is needed for @@ -723,7 +719,6 @@ static u16 domain_id_alloc(void) return id; } -#ifdef CONFIG_IOMMU_API static void domain_id_free(int id) { unsigned long flags; @@ -733,7 +728,6 @@ static void domain_id_free(int id) __clear_bit(id, amd_iommu_pd_alloc_bitmap); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } -#endif /* * Used to reserve address ranges in the aperture (e.g. for exclusion @@ -1702,9 +1696,7 @@ int __init amd_iommu_init_dma_ops(void) /* Make the driver finally visible to the drivers */ dma_ops = &amd_iommu_dma_ops; -#ifdef CONFIG_IOMMU_API register_iommu(&amd_iommu_ops); -#endif bus_register_notifier(&pci_bus_type, &device_nb); @@ -1732,8 +1724,6 @@ free_domains: * *****************************************************************************/ -#ifdef CONFIG_IOMMU_API - static void cleanup_domain(struct protection_domain *domain) { unsigned long flags; @@ -1944,4 +1934,3 @@ static struct iommu_ops amd_iommu_ops = { .iova_to_phys = amd_iommu_iova_to_phys, }; -#endif -- cgit v1.2.3 From 6bdf197b04b3ae7c85785bc5a9576f1bcb0ac7c0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 3 Jan 2009 12:50:46 +0100 Subject: ia64: cpumask fix for is_affinity_mask_valid() Impact: build fix on ia64 ia64's default_affinity_write() still had old cpumask_t usage: /home/mingo/tip/kernel/irq/proc.c: In function `default_affinity_write': /home/mingo/tip/kernel/irq/proc.c:114: error: incompatible type for argument 1 of `is_affinity_mask_valid' make[3]: *** [kernel/irq/proc.o] Error 1 make[3]: *** Waiting for unfinished jobs.... update it to cpumask_var_t. Signed-off-by: Ingo Molnar --- arch/ia64/include/asm/irq.h | 2 +- arch/ia64/kernel/irq.c | 4 ++-- kernel/irq/proc.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/ia64/include/asm/irq.h b/arch/ia64/include/asm/irq.h index 3627116fb0e2..36429a532630 100644 --- a/arch/ia64/include/asm/irq.h +++ b/arch/ia64/include/asm/irq.h @@ -27,7 +27,7 @@ irq_canonicalize (int irq) } extern void set_irq_affinity_info (unsigned int irq, int dest, int redir); -bool is_affinity_mask_valid(cpumask_t cpumask); +bool is_affinity_mask_valid(cpumask_var_t cpumask); #define is_affinity_mask_valid is_affinity_mask_valid diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 0b6db53fedcf..95ff16cb05d8 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -112,11 +112,11 @@ void set_irq_affinity_info (unsigned int irq, int hwid, int redir) } } -bool is_affinity_mask_valid(cpumask_t cpumask) +bool is_affinity_mask_valid(cpumask_var_t cpumask) { if (ia64_platform_is("sn2")) { /* Only allow one CPU to be specified in the smp_affinity mask */ - if (cpus_weight(cpumask) != 1) + if (cpumask_weight(cpumask) != 1) return false; } return true; diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 2abd3a7716ed..aae3f742bcec 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -54,7 +54,7 @@ static ssize_t irq_affinity_proc_write(struct file *file, if (err) goto free_cpumask; - if (!is_affinity_mask_valid(*new_value)) { + if (!is_affinity_mask_valid(new_value)) { err = -EINVAL; goto free_cpumask; } -- cgit v1.2.3 From 730cf27246225d56ca1603b2f3c4fdbf882d4e51 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 31 Dec 2008 18:08:45 -0800 Subject: x86: enable cpus display of kernel_max and offlined cpus Impact: enables /sys/devices/system/cpu/{kernel_max,offline} user interface By setting total_cpus, the drivers/base/cpu.c will display the values of kernel_max (NR_CPUS-1) and the offlined cpu map. Signed-off-by: Mike Travis Acked-by: Rusty Russell Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 9e177a4077ee..f49c26bd7e2d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1298,6 +1298,8 @@ __init void prefill_possible_map(void) else possible = setup_possible_cpus; + total_cpus = max_t(int, possible, num_processors + disabled_cpus); + if (possible > CONFIG_NR_CPUS) { printk(KERN_WARNING "%d Processors exceeds NR_CPUS limit of %d\n", -- cgit v1.2.3 From 9628937d5b37169151c5f6bbd40919c6ac958a46 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 31 Dec 2008 18:08:46 -0800 Subject: x86: cleanup some remaining usages of NR_CPUS where s/b nr_cpu_ids Impact: Reduce future system panics due to cpumask operations using NR_CPUS Insure that code does not look at bits >= nr_cpu_ids as when cpumasks are allocated based on nr_cpu_ids, these extra bits will not be defined. Also some other minor updates: * change in to use cpu accessor function set_cpu_present() instead of directly accessing cpu_present_map w/cpu_clear() [arch/x86/kernel/reboot.c] * use cpumask_of() instead of &cpumask_of_cpu() [arch/x86/kernel/reboot.c] * optimize some cpu_mask_to_apicid_and functions. Signed-off-by: Mike Travis Acked-by: Rusty Russell Signed-off-by: Ingo Molnar --- arch/x86/include/asm/es7000/apic.h | 32 +++------------------------- arch/x86/include/asm/lguest.h | 2 +- arch/x86/include/asm/numaq/apic.h | 4 ++-- arch/x86/include/asm/summit/apic.h | 42 +++++++------------------------------ arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/apic.c | 4 ++-- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/cpuid.c | 2 +- arch/x86/kernel/msr.c | 2 +- arch/x86/kernel/reboot.c | 4 ++-- arch/x86/kernel/smpboot.c | 2 +- arch/x86/mach-voyager/voyager_smp.c | 7 +++---- 12 files changed, 26 insertions(+), 79 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 51ac1230294e..bc53d5ef1386 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -157,7 +157,7 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) num_bits_set = cpumask_weight(cpumask); /* Return id to all */ - if (num_bits_set == NR_CPUS) + if (num_bits_set == nr_cpu_ids) return 0xFF; /* * The cpus in the mask must all be on the apic cluster. If are not @@ -190,7 +190,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) num_bits_set = cpus_weight(*cpumask); /* Return id to all */ - if (num_bits_set == NR_CPUS) + if (num_bits_set == nr_cpu_ids) return cpu_to_logical_apicid(0); /* * The cpus in the mask must all be on the apic cluster. If are not @@ -218,9 +218,6 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, const struct cpumask *andmask) { - int num_bits_set; - int cpus_found = 0; - int cpu; int apicid = cpu_to_logical_apicid(0); cpumask_var_t cpumask; @@ -229,31 +226,8 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, cpumask_and(cpumask, inmask, andmask); cpumask_and(cpumask, cpumask, cpu_online_mask); + apicid = cpu_mask_to_apicid(cpumask); - num_bits_set = cpumask_weight(cpumask); - /* Return id to all */ - if (num_bits_set == NR_CPUS) - goto exit; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = cpumask_first(cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n", __func__); - return cpu_to_logical_apicid(0); - } - apicid = new_apicid; - cpus_found++; - } - cpu++; - } -exit: free_cpumask_var(cpumask); return apicid; } diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index d28a507cef39..1caf57628b9c 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h @@ -15,7 +15,7 @@ #define SHARED_SWITCHER_PAGES \ DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE) /* Pages for switcher itself, then two pages per cpu */ -#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS) +#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) /* We map at -4M for ease of mapping into the guest (one PTE page). */ #define SWITCHER_ADDR 0xFFC00000 diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index c80f00d29965..bf37bc49bd8e 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h @@ -63,8 +63,8 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) extern u8 cpu_2_logical_apicid[]; static inline int cpu_to_logical_apicid(int cpu) { - if (cpu >= NR_CPUS) - return BAD_APICID; + if (cpu >= nr_cpu_ids) + return BAD_APICID; return (int)cpu_2_logical_apicid[cpu]; } diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 99327d1be49f..4bb5fb34f030 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h @@ -52,7 +52,7 @@ static inline void init_apic_ldr(void) int i; /* Create logical APIC IDs by counting CPUs already in cluster. */ - for (count = 0, i = NR_CPUS; --i >= 0; ) { + for (count = 0, i = nr_cpu_ids; --i >= 0; ) { lid = cpu_2_logical_apicid[i]; if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) ++count; @@ -97,8 +97,8 @@ static inline int apicid_to_node(int logical_apicid) static inline int cpu_to_logical_apicid(int cpu) { #ifdef CONFIG_SMP - if (cpu >= NR_CPUS) - return BAD_APICID; + if (cpu >= nr_cpu_ids) + return BAD_APICID; return (int)cpu_2_logical_apicid[cpu]; #else return logical_smp_processor_id(); @@ -107,7 +107,7 @@ static inline int cpu_to_logical_apicid(int cpu) static inline int cpu_present_to_apicid(int mps_cpu) { - if (mps_cpu < NR_CPUS) + if (mps_cpu < nr_cpu_ids) return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); else return BAD_APICID; @@ -146,7 +146,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) num_bits_set = cpus_weight(*cpumask); /* Return id to all */ - if (num_bits_set == NR_CPUS) + if (num_bits_set >= nr_cpu_ids) return (int) 0xFF; /* * The cpus in the mask must all be on the apic cluster. If are not @@ -173,42 +173,16 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, const struct cpumask *andmask) { - int num_bits_set; - int cpus_found = 0; - int cpu; - int apicid = 0xFF; + int apicid = cpu_to_logical_apicid(0); cpumask_var_t cpumask; if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) - return (int) 0xFF; + return apicid; cpumask_and(cpumask, inmask, andmask); cpumask_and(cpumask, cpumask, cpu_online_mask); + apicid = cpu_mask_to_apicid(cpumask); - num_bits_set = cpumask_weight(cpumask); - /* Return id to all */ - if (num_bits_set == nr_cpu_ids) - goto exit; - /* - * The cpus in the mask must all be on the apic cluster. If are not - * on the same apicid cluster return default value of TARGET_CPUS. - */ - cpu = cpumask_first(cpumask); - apicid = cpu_to_logical_apicid(cpu); - while (cpus_found < num_bits_set) { - if (cpumask_test_cpu(cpu, cpumask)) { - int new_apicid = cpu_to_logical_apicid(cpu); - if (apicid_cluster(apicid) != - apicid_cluster(new_apicid)){ - printk ("%s: Not a valid mask!\n", __func__); - return 0xFF; - } - apicid = apicid | new_apicid; - cpus_found++; - } - cpu++; - } -exit: free_cpumask_var(cpumask); return apicid; } diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 65d0b72777ea..fd24c55e4ae2 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -598,7 +598,7 @@ EXPORT_SYMBOL(acpi_map_lsapic); int acpi_unmap_lsapic(int cpu) { per_cpu(x86_cpu_to_apicid, cpu) = -1; - cpu_clear(cpu, cpu_present_map); + set_cpu_present(cpu, false); num_processors--; return (0); diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 6b7f824db160..99589245fd8d 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -140,7 +140,7 @@ static int lapic_next_event(unsigned long delta, struct clock_event_device *evt); static void lapic_timer_setup(enum clock_event_mode mode, struct clock_event_device *evt); -static void lapic_timer_broadcast(const cpumask_t *mask); +static void lapic_timer_broadcast(const struct cpumask *mask); static void apic_pm_activate(void); /* @@ -453,7 +453,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, /* * Local APIC timer broadcast function */ -static void lapic_timer_broadcast(const cpumask_t *mask) +static void lapic_timer_broadcast(const struct cpumask *mask) { #ifdef CONFIG_SMP send_IPI_mask(mask, LOCAL_TIMER_VECTOR); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 42e0853030cb..3f95a40f718a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -355,7 +355,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); } else if (smp_num_siblings > 1) { - if (smp_num_siblings > NR_CPUS) { + if (smp_num_siblings > nr_cpu_ids) { printk(KERN_WARNING "CPU: Unsupported number of siblings %d", smp_num_siblings); smp_num_siblings = 1; diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 72cefd1e649b..62a3c23bd703 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -121,7 +121,7 @@ static int cpuid_open(struct inode *inode, struct file *file) lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= NR_CPUS || !cpu_online(cpu)) { + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { ret = -ENXIO; /* No such CPU */ goto out; } diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 82a7c7ed6d45..726266695b2c 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -136,7 +136,7 @@ static int msr_open(struct inode *inode, struct file *file) lock_kernel(); cpu = iminor(file->f_path.dentry->d_inode); - if (cpu >= NR_CPUS || !cpu_online(cpu)) { + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { ret = -ENXIO; /* No such CPU */ goto out; } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index ba7b9a0e6063..de4a9d643bee 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -449,7 +449,7 @@ void native_machine_shutdown(void) #ifdef CONFIG_X86_32 /* See if there has been given a command line override */ - if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && + if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) && cpu_online(reboot_cpu)) reboot_cpu_id = reboot_cpu; #endif @@ -459,7 +459,7 @@ void native_machine_shutdown(void) reboot_cpu_id = smp_processor_id(); /* Make certain I only run on the appropriate processor */ - set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); + set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); /* O.K Now that I'm on the appropriate processor, * stop all of the others. diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f49c26bd7e2d..6bd4d9b73870 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1154,7 +1154,7 @@ static void __init smp_cpu_index_default(void) for_each_possible_cpu(i) { c = &cpu_data(i); /* mark all to hotplug */ - c->cpu_index = NR_CPUS; + c->cpu_index = nr_cpu_ids; } } diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index a5bc05492b1e..9840b7ec749a 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -357,9 +357,8 @@ void __init find_smp_config(void) printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id); /* initialize the CPU structures (moved from smp_boot_cpus) */ - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) cpu_irq_affinity[i] = ~0; - } cpu_online_map = cpumask_of_cpu(boot_cpu_id); /* The boot CPU must be extended */ @@ -1227,7 +1226,7 @@ int setup_profiling_timer(unsigned int multiplier) * new values until the next timer interrupt in which they do process * accounting. */ - for (i = 0; i < NR_CPUS; ++i) + for (i = 0; i < nr_cpu_ids; ++i) per_cpu(prof_multiplier, i) = multiplier; return 0; @@ -1257,7 +1256,7 @@ void __init voyager_smp_intr_init(void) int i; /* initialize the per cpu irq mask to all disabled */ - for (i = 0; i < NR_CPUS; i++) + for (i = 0; i < nr_cpu_ids; i++) vic_irq_mask[i] = 0xFFFF; VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt); -- cgit v1.2.3 From ee943a82b697456f9d2ac46f1e6d230beedb4b6c Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 31 Dec 2008 18:08:47 -0800 Subject: x86: use cpumask_var_t in acpi/boot.c Impact: reduce stack size, use new API. Replace cpumask_t with cpumask_var_t. Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fd24c55e4ae2..29dc0c89d4af 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -538,9 +538,10 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; struct acpi_madt_local_apic *lapic; - cpumask_t tmp_map, new_map; + cpumask_var_t tmp_map, new_map; u8 physid; int cpu; + int retval = -ENOMEM; if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) return -EINVAL; @@ -569,23 +570,37 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) buffer.length = ACPI_ALLOCATE_BUFFER; buffer.pointer = NULL; - tmp_map = cpu_present_map; + if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL)) + goto out; + + if (!alloc_cpumask_var(&new_map, GFP_KERNEL)) + goto free_tmp_map; + + cpumask_copy(tmp_map, cpu_present_mask); acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); /* * If mp_register_lapic successfully generates a new logical cpu * number, then the following will get us exactly what was mapped */ - cpus_andnot(new_map, cpu_present_map, tmp_map); - if (cpus_empty(new_map)) { + cpumask_andnot(new_map, cpu_present_mask, tmp_map); + if (cpumask_empty(new_map)) { printk ("Unable to map lapic to logical cpu number\n"); - return -EINVAL; + retval = -EINVAL; + goto free_new_map; } - cpu = first_cpu(new_map); + cpu = cpumask_first(new_map); *pcpu = cpu; - return 0; + retval = 0; + +free_new_map: + free_cpumask_var(new_map); +free_tmp_map: + free_cpumask_var(tmp_map); +out: + return retval; } /* wrapper to silence section mismatch warning */ -- cgit v1.2.3 From 2fdf66b491ac706657946442789ec644cc317e1a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 31 Dec 2008 18:08:47 -0800 Subject: cpumask: convert shared_cpu_map in acpi_processor* structs to cpumask_var_t Impact: Reduce memory usage, use new API. This is part of an effort to reduce structure sizes for machines configured with large NR_CPUS. cpumask_t gets replaced by cpumask_var_t, which is either struct cpumask[1] (small NR_CPUS) or struct cpumask * (large NR_CPUS). (Changes to powernow-k* by .) Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 27 ++++++++-- arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 9 ++++ arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 24 +++++---- drivers/acpi/processor_core.c | 14 ++++-- drivers/acpi/processor_perflib.c | 28 ++++++----- drivers/acpi/processor_throttling.c | 80 +++++++++++++++++++----------- include/acpi/processor.h | 4 +- 7 files changed, 128 insertions(+), 58 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 88ea02dcb622..d0a001093b2d 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -517,6 +517,17 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) } } +static void free_acpi_perf_data(void) +{ + unsigned int i; + + /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ + for_each_possible_cpu(i) + free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) + ->shared_cpu_map); + free_percpu(acpi_perf_data); +} + /* * acpi_cpufreq_early_init - initialize ACPI P-States library * @@ -527,6 +538,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) */ static int __init acpi_cpufreq_early_init(void) { + unsigned int i; dprintk("acpi_cpufreq_early_init\n"); acpi_perf_data = alloc_percpu(struct acpi_processor_performance); @@ -534,6 +546,15 @@ static int __init acpi_cpufreq_early_init(void) dprintk("Memory allocation error for acpi_perf_data.\n"); return -ENOMEM; } + for_each_possible_cpu(i) { + if (!alloc_cpumask_var(&per_cpu_ptr(acpi_perf_data, i) + ->shared_cpu_map, GFP_KERNEL)) { + + /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ + free_acpi_perf_data(); + return -ENOMEM; + } + } /* Do initialization in ACPI core */ acpi_processor_preregister_performance(acpi_perf_data); @@ -604,9 +625,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) */ if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { - policy->cpus = perf->shared_cpu_map; + cpumask_copy(&policy->cpus, perf->shared_cpu_map); } - policy->related_cpus = perf->shared_cpu_map; + cpumask_copy(&policy->related_cpus, perf->shared_cpu_map); #ifdef CONFIG_SMP dmi_check_system(sw_any_bug_dmi_table); @@ -795,7 +816,7 @@ static int __init acpi_cpufreq_init(void) ret = cpufreq_register_driver(&acpi_cpufreq_driver); if (ret) - free_percpu(acpi_perf_data); + free_acpi_perf_data(); return ret; } diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 7c7d56b43136..1b446d79a8fd 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -310,6 +310,12 @@ static int powernow_acpi_init(void) goto err0; } + if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, + GFP_KERNEL)) { + retval = -ENOMEM; + goto err05; + } + if (acpi_processor_register_performance(acpi_processor_perf, 0)) { retval = -EIO; goto err1; @@ -412,6 +418,8 @@ static int powernow_acpi_init(void) err2: acpi_processor_unregister_performance(acpi_processor_perf, 0); err1: + free_cpumask_var(acpi_processor_perf->shared_cpu_map); +err05: kfree(acpi_processor_perf); err0: printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); @@ -652,6 +660,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) { #ifdef CONFIG_X86_POWERNOW_K7_ACPI if (acpi_processor_perf) { acpi_processor_unregister_performance(acpi_processor_perf, 0); + free_cpumask_var(acpi_processor_perf->shared_cpu_map); kfree(acpi_processor_perf); } #endif diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 7f05f44b97e9..c3c9adbaa26f 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -766,7 +766,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { struct cpufreq_frequency_table *powernow_table; - int ret_val; + int ret_val = -ENODEV; if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { dprintk("register performance failed: bad ACPI data\n"); @@ -815,6 +815,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) /* notify BIOS that we exist */ acpi_processor_notify_smm(THIS_MODULE); + if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { + printk(KERN_ERR PFX + "unable to alloc powernow_k8_data cpumask\n"); + ret_val = -ENOMEM; + goto err_out_mem; + } + return 0; err_out_mem: @@ -826,7 +833,7 @@ err_out: /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ data->acpi_data.state_count = 0; - return -ENODEV; + return ret_val; } static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) @@ -929,6 +936,7 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { if (data->acpi_data.state_count) acpi_processor_unregister_performance(&data->acpi_data, data->cpu); + free_cpumask_var(data->acpi_data.shared_cpu_map); } #else @@ -1134,7 +1142,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) data->cpu = pol->cpu; data->currpstate = HW_PSTATE_INVALID; - if (powernow_k8_cpu_init_acpi(data)) { + rc = powernow_k8_cpu_init_acpi(data); + if (rc) { /* * Use the PSB BIOS structure. This is only availabe on * an UP version, and is deprecated by AMD. @@ -1152,20 +1161,17 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) "ACPI maintainers and complain to your BIOS " "vendor.\n"); #endif - kfree(data); - return -ENODEV; + goto err_out; } if (pol->cpu != 0) { printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " "CPU other than CPU0. Complain to your BIOS " "vendor.\n"); - kfree(data); - return -ENODEV; + goto err_out; } rc = find_psb_table(data); if (rc) { - kfree(data); - return -ENODEV; + goto err_out; } } diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 34948362f41d..0cc2fd31e376 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -826,6 +826,11 @@ static int acpi_processor_add(struct acpi_device *device) if (!pr) return -ENOMEM; + if (!alloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) { + kfree(pr); + return -ENOMEM; + } + pr->handle = device->handle; strcpy(acpi_device_name(device), ACPI_PROCESSOR_DEVICE_NAME); strcpy(acpi_device_class(device), ACPI_PROCESSOR_CLASS); @@ -845,10 +850,8 @@ static int acpi_processor_remove(struct acpi_device *device, int type) pr = acpi_driver_data(device); - if (pr->id >= nr_cpu_ids) { - kfree(pr); - return 0; - } + if (pr->id >= nr_cpu_ids) + goto free; if (type == ACPI_BUS_REMOVAL_EJECT) { if (acpi_processor_handle_eject(pr)) @@ -873,6 +876,9 @@ static int acpi_processor_remove(struct acpi_device *device, int type) per_cpu(processors, pr->id) = NULL; per_cpu(processor_device_array, pr->id) = NULL; + +free: + free_cpumask_var(pr->throttling.shared_cpu_map); kfree(pr); return 0; diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 0d7b772bef50..846e227592d4 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -588,12 +588,15 @@ int acpi_processor_preregister_performance( int count, count_target; int retval = 0; unsigned int i, j; - cpumask_t covered_cpus; + cpumask_var_t covered_cpus; struct acpi_processor *pr; struct acpi_psd_package *pdomain; struct acpi_processor *match_pr; struct acpi_psd_package *match_pdomain; + if (!alloc_cpumask_var(&covered_cpus, GFP_KERNEL)) + return -ENOMEM; + mutex_lock(&performance_mutex); retval = 0; @@ -617,7 +620,7 @@ int acpi_processor_preregister_performance( } pr->performance = percpu_ptr(performance, i); - cpu_set(i, pr->performance->shared_cpu_map); + cpumask_set_cpu(i, pr->performance->shared_cpu_map); if (acpi_processor_get_psd(pr)) { retval = -EINVAL; continue; @@ -650,18 +653,18 @@ int acpi_processor_preregister_performance( } } - cpus_clear(covered_cpus); + cpumask_clear(covered_cpus); for_each_possible_cpu(i) { pr = per_cpu(processors, i); if (!pr) continue; - if (cpu_isset(i, covered_cpus)) + if (cpumask_test_cpu(i, covered_cpus)) continue; pdomain = &(pr->performance->domain_info); - cpu_set(i, pr->performance->shared_cpu_map); - cpu_set(i, covered_cpus); + cpumask_set_cpu(i, pr->performance->shared_cpu_map); + cpumask_set_cpu(i, covered_cpus); if (pdomain->num_processors <= 1) continue; @@ -699,8 +702,8 @@ int acpi_processor_preregister_performance( goto err_ret; } - cpu_set(j, covered_cpus); - cpu_set(j, pr->performance->shared_cpu_map); + cpumask_set_cpu(j, covered_cpus); + cpumask_set_cpu(j, pr->performance->shared_cpu_map); count++; } @@ -718,8 +721,8 @@ int acpi_processor_preregister_performance( match_pr->performance->shared_type = pr->performance->shared_type; - match_pr->performance->shared_cpu_map = - pr->performance->shared_cpu_map; + cpumask_copy(match_pr->performance->shared_cpu_map, + pr->performance->shared_cpu_map); } } @@ -731,14 +734,15 @@ err_ret: /* Assume no coordination on any error parsing domain info */ if (retval) { - cpus_clear(pr->performance->shared_cpu_map); - cpu_set(i, pr->performance->shared_cpu_map); + cpumask_clear(pr->performance->shared_cpu_map); + cpumask_set_cpu(i, pr->performance->shared_cpu_map); pr->performance->shared_type = CPUFREQ_SHARED_TYPE_ALL; } pr->performance = NULL; /* Will be set for real in register */ } mutex_unlock(&performance_mutex); + free_cpumask_var(covered_cpus); return retval; } EXPORT_SYMBOL(acpi_processor_preregister_performance); diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index a0c38c94a8a0..d27838171f4a 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -61,11 +61,14 @@ static int acpi_processor_update_tsd_coord(void) int count, count_target; int retval = 0; unsigned int i, j; - cpumask_t covered_cpus; + cpumask_var_t covered_cpus; struct acpi_processor *pr, *match_pr; struct acpi_tsd_package *pdomain, *match_pdomain; struct acpi_processor_throttling *pthrottling, *match_pthrottling; + if (!alloc_cpumask_var(&covered_cpus, GFP_KERNEL)) + return -ENOMEM; + /* * Now that we have _TSD data from all CPUs, lets setup T-state * coordination between all CPUs. @@ -91,19 +94,19 @@ static int acpi_processor_update_tsd_coord(void) if (retval) goto err_ret; - cpus_clear(covered_cpus); + cpumask_clear(covered_cpus); for_each_possible_cpu(i) { pr = per_cpu(processors, i); if (!pr) continue; - if (cpu_isset(i, covered_cpus)) + if (cpumask_test_cpu(i, covered_cpus)) continue; pthrottling = &pr->throttling; pdomain = &(pthrottling->domain_info); - cpu_set(i, pthrottling->shared_cpu_map); - cpu_set(i, covered_cpus); + cpumask_set_cpu(i, pthrottling->shared_cpu_map); + cpumask_set_cpu(i, covered_cpus); /* * If the number of processor in the TSD domain is 1, it is * unnecessary to parse the coordination for this CPU. @@ -144,8 +147,8 @@ static int acpi_processor_update_tsd_coord(void) goto err_ret; } - cpu_set(j, covered_cpus); - cpu_set(j, pthrottling->shared_cpu_map); + cpumask_set_cpu(j, covered_cpus); + cpumask_set_cpu(j, pthrottling->shared_cpu_map); count++; } for_each_possible_cpu(j) { @@ -165,12 +168,14 @@ static int acpi_processor_update_tsd_coord(void) * If some CPUS have the same domain, they * will have the same shared_cpu_map. */ - match_pthrottling->shared_cpu_map = - pthrottling->shared_cpu_map; + cpumask_copy(match_pthrottling->shared_cpu_map, + pthrottling->shared_cpu_map); } } err_ret: + free_cpumask_var(covered_cpus); + for_each_possible_cpu(i) { pr = per_cpu(processors, i); if (!pr) @@ -182,8 +187,8 @@ err_ret: */ if (retval) { pthrottling = &(pr->throttling); - cpus_clear(pthrottling->shared_cpu_map); - cpu_set(i, pthrottling->shared_cpu_map); + cpumask_clear(pthrottling->shared_cpu_map); + cpumask_set_cpu(i, pthrottling->shared_cpu_map); pthrottling->shared_type = DOMAIN_COORD_TYPE_SW_ALL; } } @@ -567,7 +572,7 @@ static int acpi_processor_get_tsd(struct acpi_processor *pr) pthrottling = &pr->throttling; pthrottling->tsd_valid_flag = 1; pthrottling->shared_type = pdomain->coord_type; - cpu_set(pr->id, pthrottling->shared_cpu_map); + cpumask_set_cpu(pr->id, pthrottling->shared_cpu_map); /* * If the coordination type is not defined in ACPI spec, * the tsd_valid_flag will be clear and coordination type @@ -826,7 +831,7 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) static int acpi_processor_get_throttling(struct acpi_processor *pr) { - cpumask_t saved_mask; + cpumask_var_t saved_mask; int ret; if (!pr) @@ -834,14 +839,20 @@ static int acpi_processor_get_throttling(struct acpi_processor *pr) if (!pr->flags.throttling) return -ENODEV; + + if (!alloc_cpumask_var(&saved_mask, GFP_KERNEL)) + return -ENOMEM; + /* * Migrate task to the cpu pointed by pr. */ - saved_mask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id)); + cpumask_copy(saved_mask, ¤t->cpus_allowed); + /* FIXME: use work_on_cpu() */ + set_cpus_allowed_ptr(current, cpumask_of(pr->id)); ret = pr->throttling.acpi_processor_get_throttling(pr); /* restore the previous state */ - set_cpus_allowed_ptr(current, &saved_mask); + set_cpus_allowed_ptr(current, saved_mask); + free_cpumask_var(saved_mask); return ret; } @@ -986,13 +997,13 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, int acpi_processor_set_throttling(struct acpi_processor *pr, int state) { - cpumask_t saved_mask; + cpumask_var_t saved_mask; int ret = 0; unsigned int i; struct acpi_processor *match_pr; struct acpi_processor_throttling *p_throttling; struct throttling_tstate t_state; - cpumask_t online_throttling_cpus; + cpumask_var_t online_throttling_cpus; if (!pr) return -EINVAL; @@ -1003,17 +1014,25 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) if ((state < 0) || (state > (pr->throttling.state_count - 1))) return -EINVAL; - saved_mask = current->cpus_allowed; + if (!alloc_cpumask_var(&saved_mask, GFP_KERNEL)) + return -ENOMEM; + + if (!alloc_cpumask_var(&online_throttling_cpus, GFP_KERNEL)) { + free_cpumask_var(saved_mask); + return -ENOMEM; + } + + cpumask_copy(saved_mask, ¤t->cpus_allowed); t_state.target_state = state; p_throttling = &(pr->throttling); - cpus_and(online_throttling_cpus, cpu_online_map, - p_throttling->shared_cpu_map); + cpumask_and(online_throttling_cpus, cpu_online_mask, + p_throttling->shared_cpu_map); /* * The throttling notifier will be called for every * affected cpu in order to get one proper T-state. * The notifier event is THROTTLING_PRECHANGE. */ - for_each_cpu_mask_nr(i, online_throttling_cpus) { + for_each_cpu(i, online_throttling_cpus) { t_state.cpu = i; acpi_processor_throttling_notifier(THROTTLING_PRECHANGE, &t_state); @@ -1025,7 +1044,8 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * it can be called only for the cpu pointed by pr. */ if (p_throttling->shared_type == DOMAIN_COORD_TYPE_SW_ANY) { - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pr->id)); + /* FIXME: use work_on_cpu() */ + set_cpus_allowed_ptr(current, cpumask_of(pr->id)); ret = p_throttling->acpi_processor_set_throttling(pr, t_state.target_state); } else { @@ -1034,7 +1054,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * it is necessary to set T-state for every affected * cpus. */ - for_each_cpu_mask_nr(i, online_throttling_cpus) { + for_each_cpu(i, online_throttling_cpus) { match_pr = per_cpu(processors, i); /* * If the pointer is invalid, we will report the @@ -1056,7 +1076,8 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) continue; } t_state.cpu = i; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); + /* FIXME: use work_on_cpu() */ + set_cpus_allowed_ptr(current, cpumask_of(i)); ret = match_pr->throttling. acpi_processor_set_throttling( match_pr, t_state.target_state); @@ -1068,13 +1089,16 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) * affected cpu to update the T-states. * The notifier event is THROTTLING_POSTCHANGE */ - for_each_cpu_mask_nr(i, online_throttling_cpus) { + for_each_cpu(i, online_throttling_cpus) { t_state.cpu = i; acpi_processor_throttling_notifier(THROTTLING_POSTCHANGE, &t_state); } /* restore the previous state */ - set_cpus_allowed_ptr(current, &saved_mask); + /* FIXME: use work_on_cpu() */ + set_cpus_allowed_ptr(current, saved_mask); + free_cpumask_var(online_throttling_cpus); + free_cpumask_var(saved_mask); return ret; } @@ -1120,7 +1144,7 @@ int acpi_processor_get_throttling_info(struct acpi_processor *pr) if (acpi_processor_get_tsd(pr)) { pthrottling = &pr->throttling; pthrottling->tsd_valid_flag = 0; - cpu_set(pr->id, pthrottling->shared_cpu_map); + cpumask_set_cpu(pr->id, pthrottling->shared_cpu_map); pthrottling->shared_type = DOMAIN_COORD_TYPE_SW_ALL; } diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 3795590e152a..0574add2a1e3 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -127,7 +127,7 @@ struct acpi_processor_performance { unsigned int state_count; struct acpi_processor_px *states; struct acpi_psd_package domain_info; - cpumask_t shared_cpu_map; + cpumask_var_t shared_cpu_map; unsigned int shared_type; }; @@ -172,7 +172,7 @@ struct acpi_processor_throttling { unsigned int state_count; struct acpi_processor_tx_tss *states_tss; struct acpi_tsd_package domain_info; - cpumask_t shared_cpu_map; + cpumask_var_t shared_cpu_map; int (*acpi_processor_get_throttling) (struct acpi_processor * pr); int (*acpi_processor_set_throttling) (struct acpi_processor * pr, int state); -- cgit v1.2.3 From 80855f7361eb68205e6bc1981928629d9b02d5c9 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Wed, 31 Dec 2008 18:08:47 -0800 Subject: cpumask: use alloc_cpumask_var_node where appropriate Impact: Reduce inter-node memory traffic. Reduces inter-node memory traffic (offloading the global system bus) by allocating referenced struct cpumasks on the same node as the referring struct. Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 5 +++-- arch/x86/kernel/io_apic.c | 6 +++--- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index d0a001093b2d..28102ad1a363 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -547,8 +547,9 @@ static int __init acpi_cpufreq_early_init(void) return -ENOMEM; } for_each_possible_cpu(i) { - if (!alloc_cpumask_var(&per_cpu_ptr(acpi_perf_data, i) - ->shared_cpu_map, GFP_KERNEL)) { + if (!alloc_cpumask_var_node( + &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, + GFP_KERNEL, cpu_to_node(i))) { /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ free_acpi_perf_data(); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 3e070bb961d7..a25c3f76b8ac 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -212,11 +212,11 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu) cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); if (cfg) { - /* FIXME: needs alloc_cpumask_var_node() */ - if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) { + if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { kfree(cfg); cfg = NULL; - } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) { + } else if (!alloc_cpumask_var_node(&cfg->old_domain, + GFP_ATOMIC, node)) { free_cpumask_var(cfg->domain); kfree(cfg); cfg = NULL; -- cgit v1.2.3 From ab14398abd195af91a744c320a52a1bce814dd1e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 2 Jan 2009 21:51:32 +0300 Subject: x86: setup_per_cpu_areas() cleanup Impact: cleanup __alloc_bootmem and __alloc_bootmem_node do panic for us in case of fail so no need for additional checks here. Also lets use pr_*() macros for printing. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 49f3f709ee1f..a4b619c33106 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -153,12 +153,10 @@ void __init setup_per_cpu_areas(void) align = max_t(unsigned long, PAGE_SIZE, align); size = roundup(old_size, align); - printk(KERN_INFO - "NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", + pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); - printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", - size); + pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size); for_each_possible_cpu(cpu) { #ifndef CONFIG_NEED_MULTIPLE_NODES @@ -169,22 +167,15 @@ void __init setup_per_cpu_areas(void) if (!node_online(node) || !NODE_DATA(node)) { ptr = __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS)); - printk(KERN_INFO - "cpu %d has no node %d or node-local memory\n", + pr_info("cpu %d has no node %d or node-local memory\n", cpu, node); - if (ptr) - printk(KERN_DEBUG - "per cpu data for cpu%d at %016lx\n", - cpu, __pa(ptr)); - } - else { + pr_debug("per cpu data for cpu%d at %016lx\n", + cpu, __pa(ptr)); + } else { ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, __pa(MAX_DMA_ADDRESS)); - if (ptr) - printk(KERN_DEBUG - "per cpu data for cpu%d on node%d " - "at %016lx\n", - cpu, node, __pa(ptr)); + pr_debug("per cpu data for cpu%d on node%d at %016lx\n", + cpu, node, __pa(ptr)); } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; -- cgit v1.2.3 From 3bfacef412b4bc993a8992217e50f1245f2fd3a6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 3 Jan 2009 07:16:33 +0000 Subject: get rid of special-casing the /sbin/loader on alpha ... just make it a binfmt handler like #! one. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/alpha/kernel/Makefile | 2 +- arch/alpha/kernel/binfmt_loader.c | 51 +++++++++++++++++++++++++++++++++++++++ fs/exec.c | 39 ------------------------------ 3 files changed, 52 insertions(+), 40 deletions(-) create mode 100644 arch/alpha/kernel/binfmt_loader.c (limited to 'arch') diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile index ac706c1d7ada..b4697759a123 100644 --- a/arch/alpha/kernel/Makefile +++ b/arch/alpha/kernel/Makefile @@ -8,7 +8,7 @@ EXTRA_CFLAGS := -Werror -Wno-sign-compare obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ irq_alpha.o signal.o setup.o ptrace.o time.o \ - alpha_ksyms.o systbls.o err_common.o io.o + alpha_ksyms.o systbls.o err_common.o io.o binfmt_loader.o obj-$(CONFIG_VGA_HOSE) += console.o obj-$(CONFIG_SMP) += smp.o diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c new file mode 100644 index 000000000000..4a0af906b00a --- /dev/null +++ b/arch/alpha/kernel/binfmt_loader.c @@ -0,0 +1,51 @@ +#include +#include +#include +#include +#include +#include + +static int load_binary(struct linux_binprm *bprm, struct pt_regs *regs) +{ + struct exec *eh = (struct exec *)bprm->buf; + unsigned long loader; + struct file *file; + int retval; + + if (eh->fh.f_magic != 0x183 || (eh->fh.f_flags & 0x3000) != 0x3000) + return -ENOEXEC; + + if (bprm->loader) + return -ENOEXEC; + + allow_write_access(bprm->file); + fput(bprm->file); + bprm->file = NULL; + + loader = bprm->vma->vm_end - sizeof(void *); + + file = open_exec("/sbin/loader"); + retval = PTR_ERR(file); + if (IS_ERR(file)) + return retval; + + /* Remember if the application is TASO. */ + bprm->taso = eh->ah.entry < 0x100000000UL; + + bprm->file = file; + bprm->loader = loader; + retval = prepare_binprm(bprm); + if (retval < 0) + return retval; + return search_binary_handler(bprm,regs); +} + +static struct linux_binfmt loader_format = { + .load_binary = load_binary, +}; + +static int __init init_loader_binfmt(void) +{ + return register_binfmt(&loader_format); +} +arch_initcall(init_loader_binfmt); diff --git a/fs/exec.c b/fs/exec.c index dfbf7009fbe7..3ef9cf9b1871 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -57,11 +57,6 @@ #include #include "internal.h" -#ifdef __alpha__ -/* for /sbin/loader handling in search_binary_handler() */ -#include -#endif - int core_uses_pid; char core_pattern[CORENAME_MAX_SIZE] = "core"; int suid_dumpable = 0; @@ -1172,41 +1167,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) unsigned int depth = bprm->recursion_depth; int try,retval; struct linux_binfmt *fmt; -#ifdef __alpha__ - /* handle /sbin/loader.. */ - { - struct exec * eh = (struct exec *) bprm->buf; - if (!bprm->loader && eh->fh.f_magic == 0x183 && - (eh->fh.f_flags & 0x3000) == 0x3000) - { - struct file * file; - unsigned long loader; - - allow_write_access(bprm->file); - fput(bprm->file); - bprm->file = NULL; - - loader = bprm->vma->vm_end - sizeof(void *); - - file = open_exec("/sbin/loader"); - retval = PTR_ERR(file); - if (IS_ERR(file)) - return retval; - - /* Remember if the application is TASO. */ - bprm->taso = eh->ah.entry < 0x100000000UL; - - bprm->file = file; - bprm->loader = loader; - retval = prepare_binprm(bprm); - if (retval<0) - return retval; - /* should call search_binary_handler recursively here, - but it does not matter */ - } - } -#endif retval = security_bprm_check(bprm); if (retval) return retval; -- cgit v1.2.3