From 747ada36ee23225d81657e4d633ac93b8ccbea7d Mon Sep 17 00:00:00 2001 From: Olaf Dabrunz Date: Wed, 11 Jun 2008 16:35:13 +0200 Subject: pci: add PCI IDs for devices that need boot irq quirks Signed-off-by: Stefan Assmann Signed-off-by: Olaf Dabrunz Signed-off-by: Ingo Molnar --- include/linux/pci_ids.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 65953822c9cb..7f3f101e03c1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2235,6 +2235,10 @@ #define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 #define PCI_DEVICE_ID_INTEL_PXH_1 0x032A #define PCI_DEVICE_ID_INTEL_PXHV 0x032C +#define PCI_DEVICE_ID_INTEL_80332_0 0x0330 +#define PCI_DEVICE_ID_INTEL_80332_1 0x0332 +#define PCI_DEVICE_ID_INTEL_80333_0 0x0370 +#define PCI_DEVICE_ID_INTEL_80333_1 0x0372 #define PCI_DEVICE_ID_INTEL_82375 0x0482 #define PCI_DEVICE_ID_INTEL_82424 0x0483 #define PCI_DEVICE_ID_INTEL_82378 0x0484 @@ -2307,6 +2311,7 @@ #define PCI_DEVICE_ID_INTEL_ESB_4 0x25a4 #define PCI_DEVICE_ID_INTEL_ESB_5 0x25a6 #define PCI_DEVICE_ID_INTEL_ESB_9 0x25ab +#define PCI_DEVICE_ID_INTEL_ESB_10 0x25ac #define PCI_DEVICE_ID_INTEL_82820_HB 0x2500 #define PCI_DEVICE_ID_INTEL_82820_UP_HB 0x2501 #define PCI_DEVICE_ID_INTEL_82850_HB 0x2530 -- cgit v1.2.3 From a9322f6488b432ddc1e89be88242c827c633fb63 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Wed, 11 Jun 2008 16:35:14 +0200 Subject: x86, pci: introduce pci=noioapicquirk kernel cmdline option Introduce pci=noioapicquirk kernel cmdline option to disable all boot interrupt quirks Signed-off-by: Stefan Assmann Signed-off-by: Olaf Dabrunz Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 3 +++ arch/x86/pci/common.c | 4 ++++ include/asm-x86/io_apic.h | 4 ++++ include/asm-x86/pci.h | 1 + 4 files changed, 12 insertions(+) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 795c487af8e4..1aebe9dffbaa 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1518,6 +1518,9 @@ and is between 256 and 4096 characters. It is defined in the file nomsi [MSI] If the PCI_MSI kernel config parameter is enabled, this kernel boot option can be used to disable the use of MSI interrupts system-wide. + noioapicquirk [APIC] Disable all boot interrupt quirks. + Safety option to keep boot IRQs enabled. This + should never be necessary. biosirq [X86-32] Use PCI BIOS calls to get the interrupt routing table. These calls are known to be buggy on several machines and they hang the machine diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 940185ecaeda..bc6a101ed7ec 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -22,6 +22,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | static int pci_bf_sort; int pci_routeirq; +int noioapicquirk; int pcibios_last_bus = -1; unsigned long pirq_table_addr; struct pci_bus *pci_root_bus; @@ -495,6 +496,9 @@ char * __devinit pcibios_setup(char *str) } else if (!strcmp(str, "skip_isa_align")) { pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; return NULL; + } else if (!strcmp(str, "noioapicquirk")) { + noioapicquirk = 1; + return NULL; } return str; } diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 14f82bbcb5fd..8ca0110819f4 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -157,11 +157,15 @@ extern int sis_apic_bug; /* 1 if "noapic" boot option passed */ extern int skip_ioapic_setup; +/* 1 if "noapic" boot option passed */ +extern int noioapicquirk; + /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ extern int timer_through_8259; static inline void disable_ioapic_setup(void) { + noioapicquirk = 1; skip_ioapic_setup = 1; } diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h index 2db14cf17db8..30eec93a845e 100644 --- a/include/asm-x86/pci.h +++ b/include/asm-x86/pci.h @@ -19,6 +19,7 @@ struct pci_sysdata { }; extern int pci_routeirq; +extern int noioapicquirk; /* scan a bus after allocating a pci_sysdata for it */ extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, -- cgit v1.2.3 From 9197979b518573999d52d9e85bce1680682ed85c Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Wed, 11 Jun 2008 16:35:15 +0200 Subject: x86, pci: introduce pci=ioapicreroute kernel cmdline option Introduce pci=ioapicreroute kernel cmdline option to enable rerouting of boot interrupts to the primary io-apic. Signed-off-by: Stefan Assmann Signed-off-by: Olaf Dabrunz Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 4 ++++ arch/x86/pci/common.c | 5 +++++ include/asm-x86/io_apic.h | 4 ++++ include/asm-x86/pci.h | 1 + 4 files changed, 14 insertions(+) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1aebe9dffbaa..df262b3c3d6e 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1521,6 +1521,10 @@ and is between 256 and 4096 characters. It is defined in the file noioapicquirk [APIC] Disable all boot interrupt quirks. Safety option to keep boot IRQs enabled. This should never be necessary. + ioapicreroute [APIC] Enable rerouting of boot IRQs to the + primary IO-APIC for bridges that cannot disable + boot IRQs. This fixes a source of spurious IRQs + when the system masks IRQs. biosirq [X86-32] Use PCI BIOS calls to get the interrupt routing table. These calls are known to be buggy on several machines and they hang the machine diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index bc6a101ed7ec..0a9eaa736d94 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -23,6 +23,7 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | static int pci_bf_sort; int pci_routeirq; int noioapicquirk; +int noioapicreroute = 1; int pcibios_last_bus = -1; unsigned long pirq_table_addr; struct pci_bus *pci_root_bus; @@ -499,6 +500,10 @@ char * __devinit pcibios_setup(char *str) } else if (!strcmp(str, "noioapicquirk")) { noioapicquirk = 1; return NULL; + } else if (!strcmp(str, "ioapicreroute")) { + if (noioapicreroute != -1) + noioapicreroute = 0; + return NULL; } return str; } diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 8ca0110819f4..a39670ae17df 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -160,12 +160,16 @@ extern int skip_ioapic_setup; /* 1 if "noapic" boot option passed */ extern int noioapicquirk; +/* -1 if "noapic" boot option passed */ +extern int noioapicreroute; + /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ extern int timer_through_8259; static inline void disable_ioapic_setup(void) { noioapicquirk = 1; + noioapicreroute = -1; skip_ioapic_setup = 1; } diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h index 30eec93a845e..52a29f7668ef 100644 --- a/include/asm-x86/pci.h +++ b/include/asm-x86/pci.h @@ -20,6 +20,7 @@ struct pci_sysdata { extern int pci_routeirq; extern int noioapicquirk; +extern int ioapicreroute; /* scan a bus after allocating a pci_sysdata for it */ extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, -- cgit v1.2.3 From e1d3a90846b40ad3160bf4b648d36c6badad39ac Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Wed, 11 Jun 2008 16:35:17 +0200 Subject: pci, acpi: reroute PCI interrupt to legacy boot interrupt equivalent Some chipsets (e.g. intel 6700PXH) generate a legacy INTx when the IRQ entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel does during interrupt handling). On chipsets where this INTx generation cannot be disabled, we reroute the valid interrupts to their legacy equivalent to get rid of spurious interrupts that might otherwise bring down (vital) interrupt lines through spurious interrupt detection in note_interrupt(). This patch benefited from discussions with Alexander Graf, Torsten Duwe, Ihno Krumreich, Daniel Gollub, Hannes Reinecke. The conclusions we drew and the patch itself are the authors' responsibility alone. Signed-off-by: Stefan Assmann Signed-off-by: Olaf Dabrunz Signed-off-by: Ingo Molnar --- drivers/acpi/pci_irq.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/pci/quirks.c | 28 +++++++++++++++++++++++++ include/linux/pci.h | 6 ++++++ 3 files changed, 90 insertions(+) (limited to 'include') diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 89022a74faee..b37cb0a9826e 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -384,6 +384,27 @@ acpi_pci_free_irq(struct acpi_prt_entry *entry, return irq; } +#ifdef CONFIG_X86_IO_APIC +extern int noioapicquirk; + +static int bridge_has_boot_interrupt_variant(struct pci_bus *bus) +{ + struct pci_bus *bus_it; + + for (bus_it = bus ; bus_it ; bus_it = bus_it->parent) { + if (!bus_it->self) + return 0; + + printk(KERN_INFO "vendor=%04x device=%04x\n", bus_it->self->vendor, + bus_it->self->device); + + if (bus_it->self->irq_reroute_variant) + return bus_it->self->irq_reroute_variant; + } + return 0; +} +#endif /* CONFIG_X86_IO_APIC */ + /* * acpi_pci_irq_lookup * success: return IRQ >= 0 @@ -413,6 +434,41 @@ acpi_pci_irq_lookup(struct pci_bus *bus, } ret = func(entry, triggering, polarity, link); + +#ifdef CONFIG_X86_IO_APIC + /* + * Some chipsets (e.g. intel 6700PXH) generate a legacy INTx when the + * IRQ entry in the chipset's IO-APIC is masked (as, e.g. the RT kernel + * does during interrupt handling). When this INTx generation cannot be + * disabled, we reroute these interrupts to their legacy equivalent to + * get rid of spurious interrupts. + */ + if (!noioapicquirk) { + switch (bridge_has_boot_interrupt_variant(bus)) { + case 0: + /* no rerouting necessary */ + break; + + case INTEL_IRQ_REROUTE_VARIANT: + /* + * Remap according to INTx routing table in 6700PXH + * specs, intel order number 302628-002, section + * 2.15.2. Other chipsets (80332, ...) have the same + * mapping and are handled here as well. + */ + printk(KERN_INFO "pci irq %d -> rerouted to legacy " + "irq %d\n", ret, (ret % 4) + 16); + ret = (ret % 4) + 16; + break; + + default: + printk(KERN_INFO "not rerouting irq %d to legacy irq: " + "unknown mapping\n", ret); + break; + } + } +#endif /* CONFIG_X86_IO_APIC */ + return ret; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index eb97564316d0..ac634ae2eb08 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1364,6 +1364,34 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260a, quirk_intel_pcie_pm); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260b, quirk_intel_pcie_pm); #ifdef CONFIG_X86_IO_APIC +/* + * Boot interrupts on some chipsets cannot be turned off. For these chipsets, + * remap the original interrupt in the linux kernel to the boot interrupt, so + * that a PCI device's interrupt handler is installed on the boot interrupt + * line instead. + */ +static void quirk_reroute_to_boot_interrupts_intel(struct pci_dev *dev) +{ + int i; + + if (noioapicquirk) + return; + + dev->irq_reroute_variant = INTEL_IRQ_REROUTE_VARIANT; + + printk(KERN_INFO "PCI quirk: reroute interrupts for 0x%04x:0x%04x\n", + dev->vendor, dev->device); + return; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80333_1, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_1, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXHV, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_0, quirk_reroute_to_boot_interrupts_intel); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_80332_1, quirk_reroute_to_boot_interrupts_intel); + /* * On some chipsets we can disable the generation of legacy INTx boot * interrupts. diff --git a/include/linux/pci.h b/include/linux/pci.h index d18b1dd49fab..6755cf5ac109 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -117,6 +117,11 @@ enum pci_dev_flags { PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1, }; +enum pci_irq_reroute_variant { + INTEL_IRQ_REROUTE_VARIANT = 1, + MAX_IRQ_REROUTE_VARIANTS = 3 +}; + typedef unsigned short __bitwise pci_bus_flags_t; enum pci_bus_flags { PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1, @@ -194,6 +199,7 @@ struct pci_dev { unsigned int no_d1d2:1; /* only allow d0 or d3 */ unsigned int block_ucfg_access:1; /* userspace config space access is blocked */ unsigned int broken_parity_status:1; /* Device generates false positive parity */ + unsigned int irq_reroute_variant:2; /* device needs IRQ rerouting variant */ unsigned int msi_enabled:1; unsigned int msix_enabled:1; unsigned int is_managed:1; -- cgit v1.2.3 From 33be8333421f842789fa7e363ce4142947e094f0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 18 Jun 2008 14:47:09 +0200 Subject: x86: boot IRQ quirks and rerouting, fix fix: init/built-in.o: In function `nosmp': main.c:(.init.text+0x14): undefined reference to `noioapicquirk' main.c:(.init.text+0x1e): undefined reference to `noioapicreroute' init/built-in.o: In function `maxcpus': main.c:(.init.text+0x133): undefined reference to `noioapicquirk' main.c:(.init.text+0x13d): undefined reference to `noioapicreroute' arch/x86/kernel/built-in.o: In function `parse_noapic': io_apic_32.c:(.init.text+0x7836): undefined reference to `noioapicquirk' io_apic_32.c:(.init.text+0x7840): undefined reference to `noioapicreroute' Signed-off-by: Ingo Molnar --- include/asm-x86/io_apic.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index a39670ae17df..721605d8f116 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -168,8 +168,10 @@ extern int timer_through_8259; static inline void disable_ioapic_setup(void) { +#ifdef CONFIG_PCI noioapicquirk = 1; noioapicreroute = -1; +#endif skip_ioapic_setup = 1; } -- cgit v1.2.3 From 41b9eb264c8407655db57b60b4457fe1b2ec9977 Mon Sep 17 00:00:00 2001 From: Stefan Assmann Date: Tue, 15 Jul 2008 13:48:55 +0200 Subject: x86, pci: introduce config option for pci reroute quirks (was: [PATCH 0/3] Boot IRQ quirks for Broadcom and AMD/ATI) This is against linux-2.6-tip, branch pci-ioapic-boot-irq-quirks. From: Stefan Assmann Subject: Introduce config option for pci reroute quirks The config option X86_REROUTE_FOR_BROKEN_BOOT_IRQS is introduced to enable (or disable) the redirection of the interrupt handler to the boot interrupt line by default. Depending on the existence of interrupt masking / threaded interrupt handling in the kernel (vanilla, rt, ...) and the maturity of the rerouting patch, users can enable or disable the redirection by default. This means that the reroute quirk can be applied to any kernel without changing it. Interrupt sharing could be increased if this option is enabled. However this option is vital for threaded interrupt handling, as done by the RT kernel. It should simplify the consolidation with the RT kernel. The option can be overridden by either pci=ioapicreroute or pci=noioapicreroute. Signed-off-by: Stefan Assmann Signed-off-by: Olaf Dabrunz Cc: Jesse Barnes Cc: Jon Masters Cc: Ihno Krumreich Cc: Sven Dietrich Cc: Daniel Gollub Cc: Felix Foerster Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 4 ++++ arch/x86/Kconfig | 24 ++++++++++++++++++++++++ arch/x86/pci/common.c | 8 ++++++++ drivers/pci/quirks.c | 2 +- include/asm-x86/pci.h | 2 +- 5 files changed, 38 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f5662b7a34d1..62b6e8067a5b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1536,6 +1536,10 @@ and is between 256 and 4096 characters. It is defined in the file primary IO-APIC for bridges that cannot disable boot IRQs. This fixes a source of spurious IRQs when the system masks IRQs. + noioapicreroute [APIC] Disable workaround that uses the + boot IRQ equivalent of an IRQ that connects to + a chipset where boot IRQs cannot be disabled. + The opposite of ioapicreroute. biosirq [X86-32] Use PCI BIOS calls to get the interrupt routing table. These calls are known to be buggy on several machines and they hang the machine diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 96e0c2ebc388..09521332636b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -665,6 +665,30 @@ config X86_VISWS_APIC def_bool y depends on X86_32 && X86_VISWS +config X86_REROUTE_FOR_BROKEN_BOOT_IRQS + bool "Reroute for broken boot IRQs" + default n + depends on X86_IO_APIC + help + This option enables a workaround that fixes a source of + spurious interrupts. This is recommended when threaded + interrupt handling is used on systems where the generation of + superfluous "boot interrupts" cannot be disabled. + + Some chipsets generate a legacy INTx "boot IRQ" when the IRQ + entry in the chipset's IO-APIC is masked (as, e.g. the RT + kernel does during interrupt handling). On chipsets where this + boot IRQ generation cannot be disabled, this workaround keeps + the original IRQ line masked so that only the equivalent "boot + IRQ" is delivered to the CPUs. The workaround also tells the + kernel to set up the IRQ handler on the boot IRQ line. In this + way only one interrupt is delivered to the kernel. Otherwise + the spurious second interrupt may cause the kernel to bring + down (vital) interrupt lines. + + Only affects "broken" chipsets. Interrupt sharing may be + increased on these systems. + config X86_MCE bool "Machine Check Exception" depends on !X86_VOYAGER diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 1485a26ddcef..bb1a01f089e2 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -24,7 +24,11 @@ unsigned int pci_early_dump_regs; static int pci_bf_sort; int pci_routeirq; int noioapicquirk; +#ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS +int noioapicreroute = 0; +#else int noioapicreroute = 1; +#endif int pcibios_last_bus = -1; unsigned long pirq_table_addr; struct pci_bus *pci_root_bus; @@ -528,6 +532,10 @@ char * __devinit pcibios_setup(char *str) if (noioapicreroute != -1) noioapicreroute = 0; return NULL; + } else if (!strcmp(str, "noioapicreroute")) { + if (noioapicreroute != -1) + noioapicreroute = 1; + return NULL; } return str; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 0911b0c60b64..c880dd0bbfb5 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1397,7 +1397,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x260b, quirk_intel_pcie_pm); */ static void quirk_reroute_to_boot_interrupts_intel(struct pci_dev *dev) { - if (noioapicquirk) + if (noioapicquirk || noioapicreroute) return; dev->irq_reroute_variant = INTEL_IRQ_REROUTE_VARIANT; diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h index 52a29f7668ef..9584d6d5eb93 100644 --- a/include/asm-x86/pci.h +++ b/include/asm-x86/pci.h @@ -20,7 +20,7 @@ struct pci_sysdata { extern int pci_routeirq; extern int noioapicquirk; -extern int ioapicreroute; +extern int noioapicreroute; /* scan a bus after allocating a pci_sysdata for it */ extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, -- cgit v1.2.3 From bd8fbdee6562ee526f3c2582a3b373ef195015dd Mon Sep 17 00:00:00 2001 From: Rui Sousa Date: Wed, 3 Sep 2008 17:53:07 +0200 Subject: lockdep: fix compilation when CONFIG_TRACE_IRQFLAGS_SUPPORT is not set This patch fixes compilation if CONFIG_TRACE_IRQFLAGS_SUPPORT is ever disabled (which is currently not allowed by Kconfig). Alternatively we could just remove the option altogether and the associated code paths. Since the compilation error has been in the tree for at least two years and no one noticed it, I guess we don't really have the need for CONFIG_TRACE_IRQFLAGS_SUPPORT=n. Boot tested on x86 UP. Signed-off-by: Rui Sousa Signed-off-by: Ingo Molnar --- include/linux/irqflags.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 74bde13224c9..f2993512b3b5 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -52,10 +52,10 @@ # define start_critical_timings() do { } while (0) #endif -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - #include +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT + #define local_irq_enable() \ do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) #define local_irq_disable() \ @@ -84,21 +84,20 @@ * The local_irq_*() APIs are equal to the raw_local_irq*() * if !TRACE_IRQFLAGS. */ -# define raw_local_irq_disable() local_irq_disable() -# define raw_local_irq_enable() local_irq_enable() -# define raw_local_irq_save(flags) \ +#define local_irq_disable() raw_local_irq_disable() +#define local_irq_enable() raw_local_irq_enable() +#define local_irq_save(flags) \ do { \ typecheck(unsigned long, flags); \ - local_irq_save(flags); \ + raw_local_irq_save(flags); \ } while (0) -# define raw_local_irq_restore(flags) \ +# define local_irq_restore(flags) \ do { \ typecheck(unsigned long, flags); \ - local_irq_restore(flags); \ + raw_local_irq_restore(flags); \ } while (0) #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #define safe_halt() \ do { \ trace_hardirqs_on(); \ @@ -124,6 +123,5 @@ typecheck(unsigned long, flags); \ raw_irqs_disabled_flags(flags); \ }) -#endif /* CONFIG_X86 */ #endif -- cgit v1.2.3 From 8c56250f48347750c82ab18d98d647dcf99ca674 Mon Sep 17 00:00:00 2001 From: Rui Sousa Date: Thu, 4 Sep 2008 19:47:53 +0200 Subject: lockdep, UML: fix compilation when CONFIG_TRACE_IRQFLAGS_SUPPORT is not set Hiroshi Shimamoto reported: > > !TRACE_IRQFLAGS_SUPPORT mode of build for future work, it can be > > restored via a simple revert. > > Hi, it seems that this patch breaks uml build. > > kernel/printk.c: In function 'vprintk': > kernel/printk.c:674: error: implicit declaration of function > 'raw_local_irq_save' kernel/printk.c:772: error: implicit declaration of > function 'raw_local_irq_restore' With the patch bellow it compiles (make ARCH=um with a x86 host), but I'm really out of my league on this one... Reported-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- include/asm-um/system-generic.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/asm-um/system-generic.h b/include/asm-um/system-generic.h index 5bcfa35e7a22..f1ea4da34fad 100644 --- a/include/asm-um/system-generic.h +++ b/include/asm-um/system-generic.h @@ -4,15 +4,15 @@ #include "asm/arch/system.h" #undef switch_to -#undef local_irq_save -#undef local_irq_restore -#undef local_irq_disable -#undef local_irq_enable -#undef local_save_flags -#undef local_irq_restore -#undef local_irq_enable -#undef local_irq_disable -#undef local_irq_save +#undef raw_local_irq_save +#undef raw_local_irq_restore +#undef raw_local_irq_disable +#undef raw_local_irq_enable +#undef raw_local_save_flags +#undef raw_local_irq_restore +#undef raw_local_irq_enable +#undef raw_local_irq_disable +#undef raw_local_irq_save #undef irqs_disabled extern void *switch_to(void *prev, void *next, void *last); @@ -23,21 +23,21 @@ extern int get_signals(void); extern void block_signals(void); extern void unblock_signals(void); -#define local_save_flags(flags) do { typecheck(unsigned long, flags); \ +#define raw_local_save_flags(flags) do { typecheck(unsigned long, flags); \ (flags) = get_signals(); } while(0) -#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \ +#define raw_local_irq_restore(flags) do { typecheck(unsigned long, flags); \ set_signals(flags); } while(0) -#define local_irq_save(flags) do { local_save_flags(flags); \ - local_irq_disable(); } while(0) +#define raw_local_irq_save(flags) do { raw_local_save_flags(flags); \ + raw_local_irq_disable(); } while(0) -#define local_irq_enable() unblock_signals() -#define local_irq_disable() block_signals() +#define raw_local_irq_enable() unblock_signals() +#define raw_local_irq_disable() block_signals() #define irqs_disabled() \ ({ \ unsigned long flags; \ - local_save_flags(flags); \ + raw_local_save_flags(flags); \ (flags == 0); \ }) -- cgit v1.2.3 From ab7476cf76e560f0efda2a631a70aabe93009025 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 15 Aug 2008 15:29:38 -0700 Subject: debug: add notifier chain debugging, v2 - unbreak ia64 (and powerpc) where function pointers dont point at code but at data (reported by Tony Luck) [ mingo@elte.hu: various cleanups ] Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 3 +++ kernel/extable.c | 16 ++++++++++++++++ kernel/notifier.c | 10 +--------- lib/vsprintf.c | 2 +- 4 files changed, 21 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2651f805ba6d..4e1366b552ae 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -187,6 +187,9 @@ extern unsigned long long memparse(char *ptr, char **retptr); extern int core_kernel_text(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); +extern int func_ptr_is_kernel_text(void *ptr); +extern void *dereference_function_descriptor(void *ptr); + struct pid; extern struct pid *session_of_pgrp(struct pid *pgrp); diff --git a/kernel/extable.c b/kernel/extable.c index a26cb2e17023..adf0cc9c02d6 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -66,3 +66,19 @@ int kernel_text_address(unsigned long addr) return 1; return module_text_address(addr) != NULL; } + +/* + * On some architectures (PPC64, IA64) function pointers + * are actually only tokens to some data that then holds the + * real function address. As a result, to find if a function + * pointer is part of the kernel text, we need to do some + * special dereferencing first. + */ +int func_ptr_is_kernel_text(void *ptr) +{ + unsigned long addr; + addr = (unsigned long) dereference_function_descriptor(ptr); + if (core_kernel_text(addr)) + return 1; + return module_text_address(addr) != NULL; +} diff --git a/kernel/notifier.c b/kernel/notifier.c index 143fdd77dbf7..0f39e398ef60 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -21,10 +21,6 @@ BLOCKING_NOTIFIER_HEAD(reboot_notifier_list); static int notifier_chain_register(struct notifier_block **nl, struct notifier_block *n) { - if (!kernel_text_address((unsigned long)n->notifier_call)) { - WARN(1, "Invalid notifier registered!"); - return 0; - } while ((*nl) != NULL) { if (n->priority > (*nl)->priority) break; @@ -38,10 +34,6 @@ static int notifier_chain_register(struct notifier_block **nl, static int notifier_chain_cond_register(struct notifier_block **nl, struct notifier_block *n) { - if (!kernel_text_address((unsigned long)n->notifier_call)) { - WARN(1, "Invalid notifier registered!"); - return 0; - } while ((*nl) != NULL) { if ((*nl) == n) return 0; @@ -92,7 +84,7 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl, next_nb = rcu_dereference(nb->next); #ifdef CONFIG_DEBUG_NOTIFIERS - if (!kernel_text_address((unsigned long)nb->notifier_call)) { + if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) { WARN(1, "Invalid notifier called!"); nb = next_nb; continue; diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d8d1d1142248..f5e5ffb9942f 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -513,7 +513,7 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio return buf; } -static inline void *dereference_function_descriptor(void *ptr) +void *dereference_function_descriptor(void *ptr) { #if defined(CONFIG_IA64) || defined(CONFIG_PPC64) void *p; -- cgit v1.2.3 From 76b189e91845eab3a9d52bb97f971d312d25652d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Sep 2008 09:57:35 +0200 Subject: lockdep: add might_lock() / might_lock_read() useful to establish a lock dependency in case the actual dependency is rare or hard to trigger. Signed-off-by: Peter Zijlstra Acked-by: Nick Piggin Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 331e5f1c2d8e..0aa657aa8a1e 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -480,4 +480,22 @@ static inline void print_irqtrace_events(struct task_struct *curr) # define lock_map_release(l) do { } while (0) #endif +#ifdef CONFIG_PROVE_LOCKING +# define might_lock(lock) \ +do { \ + typecheck(struct lockdep_map *, &(lock)->dep_map); \ + lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_); \ + lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ +} while (0) +# define might_lock_read(lock) \ +do { \ + typecheck(struct lockdep_map *, &(lock)->dep_map); \ + lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_); \ + lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ +} while (0) +#else +# define might_lock(lock) do { } while (0) +# define might_lock_read(lock) do { } while (0) +#endif + #endif /* __LINUX_LOCKDEP_H */ -- cgit v1.2.3 From c10d38dda1774ed4540380333cabd229eff37094 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 10 Sep 2008 13:37:17 +0200 Subject: x86: some lock annotations for user copy paths copy_to/from_user and all its variants (except the atomic ones) can take a page fault and perform non-trivial work like taking mmap_sem and entering the filesyste/pagecache. Unfortunately, this often escapes lockdep because a common pattern is to use it to read in some arguments just set up from userspace, or write data back to a hot buffer. In those cases, it will be unlikely for page reclaim to get a window in to cause copy_*_user to fault. With the new might_lock primitives, add some annotations to x86. I don't know if I caught all possible faulting points (it's a bit of a maze, and I didn't really look at 32-bit). But this is a starting point. Boots and runs OK so far. Signed-off-by: Nick Piggin Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/lib/usercopy_32.c | 7 ++++++- arch/x86/lib/usercopy_64.c | 4 ++++ include/asm-x86/uaccess.h | 14 ++++++++++++++ include/asm-x86/uaccess_32.h | 10 ++++++++-- include/asm-x86/uaccess_64.h | 12 ++++++++++++ 5 files changed, 44 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 24e60944971a..8eedde2a9cac 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -33,6 +33,8 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon do { \ int __d0, __d1, __d2; \ might_sleep(); \ + if (current->mm) \ + might_lock_read(¤t->mm->mmap_sem); \ __asm__ __volatile__( \ " testl %1,%1\n" \ " jz 2f\n" \ @@ -120,6 +122,8 @@ EXPORT_SYMBOL(strncpy_from_user); do { \ int __d0; \ might_sleep(); \ + if (current->mm) \ + might_lock_read(¤t->mm->mmap_sem); \ __asm__ __volatile__( \ "0: rep; stosl\n" \ " movl %2,%0\n" \ @@ -148,7 +152,6 @@ do { \ unsigned long clear_user(void __user *to, unsigned long n) { - might_sleep(); if (access_ok(VERIFY_WRITE, to, n)) __do_clear_user(to, n); return n; @@ -191,6 +194,8 @@ long strnlen_user(const char __user *s, long n) unsigned long res, tmp; might_sleep(); + if (current->mm) + might_lock_read(¤t->mm->mmap_sem); __asm__ __volatile__( " testl %0, %0\n" diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index f4df6e7c718b..847d12945998 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -16,6 +16,8 @@ do { \ long __d0, __d1, __d2; \ might_sleep(); \ + if (current->mm) \ + might_lock_read(¤t->mm->mmap_sem); \ __asm__ __volatile__( \ " testq %1,%1\n" \ " jz 2f\n" \ @@ -65,6 +67,8 @@ unsigned long __clear_user(void __user *addr, unsigned long size) { long __d0; might_sleep(); + if (current->mm) + might_lock_read(¤t->mm->mmap_sem); /* no memory constraint because it doesn't change any memory gcc knows about */ asm volatile( diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h index 5f702d1d5218..ad29752a1713 100644 --- a/include/asm-x86/uaccess.h +++ b/include/asm-x86/uaccess.h @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include @@ -157,6 +159,9 @@ extern int __get_user_bad(void); int __ret_gu; \ unsigned long __val_gu; \ __chk_user_ptr(ptr); \ + might_sleep(); \ + if (current->mm) \ + might_lock_read(¤t->mm->mmap_sem); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_x(1, __ret_gu, __val_gu, ptr); \ @@ -241,6 +246,9 @@ extern void __put_user_8(void); int __ret_pu; \ __typeof__(*(ptr)) __pu_val; \ __chk_user_ptr(ptr); \ + might_sleep(); \ + if (current->mm) \ + might_lock_read(¤t->mm->mmap_sem); \ __pu_val = x; \ switch (sizeof(*(ptr))) { \ case 1: \ @@ -265,6 +273,9 @@ extern void __put_user_8(void); #define __put_user_size(x, ptr, size, retval, errret) \ do { \ retval = 0; \ + might_sleep(); \ + if (current->mm) \ + might_lock_read(¤t->mm->mmap_sem); \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ @@ -317,6 +328,9 @@ do { \ #define __get_user_size(x, ptr, size, retval, errret) \ do { \ retval = 0; \ + might_sleep(); \ + if (current->mm) \ + might_lock_read(¤t->mm->mmap_sem); \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h index 6fdef39a0bcb..d725e2d703f7 100644 --- a/include/asm-x86/uaccess_32.h +++ b/include/asm-x86/uaccess_32.h @@ -82,8 +82,10 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) static __always_inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { - might_sleep(); - return __copy_to_user_inatomic(to, from, n); + might_sleep(); + if (current->mm) + might_lock_read(¤t->mm->mmap_sem); + return __copy_to_user_inatomic(to, from, n); } static __always_inline unsigned long @@ -138,6 +140,8 @@ static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_sleep(); + if (current->mm) + might_lock_read(¤t->mm->mmap_sem); if (__builtin_constant_p(n)) { unsigned long ret; @@ -160,6 +164,8 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to, const void __user *from, unsigned long n) { might_sleep(); + if (current->mm) + might_lock_read(¤t->mm->mmap_sem); if (__builtin_constant_p(n)) { unsigned long ret; diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h index 515d4dce96b5..40a7205fe576 100644 --- a/include/asm-x86/uaccess_64.h +++ b/include/asm-x86/uaccess_64.h @@ -28,6 +28,10 @@ static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) { int ret = 0; + + might_sleep(); + if (current->mm) + might_lock_read(¤t->mm->mmap_sem); if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { @@ -70,6 +74,10 @@ static __always_inline __must_check int __copy_to_user(void __user *dst, const void *src, unsigned size) { int ret = 0; + + might_sleep(); + if (current->mm) + might_lock_read(¤t->mm->mmap_sem); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, src, size); switch (size) { @@ -112,6 +120,10 @@ static __always_inline __must_check int __copy_in_user(void __user *dst, const void __user *src, unsigned size) { int ret = 0; + + might_sleep(); + if (current->mm) + might_lock_read(¤t->mm->mmap_sem); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, (__force void *)src, size); -- cgit v1.2.3 From 3ee1afa308f2a38e5d1e2ad3752ad7abcf480da1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 10 Sep 2008 13:37:17 +0200 Subject: x86: some lock annotations for user copy paths, v2 - introduce might_fault() - handle the atomic user copy paths correctly [ mingo@elte.hu: move might_sleep() outside of in_atomic(). ] Signed-off-by: Nick Piggin Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/lib/usercopy_32.c | 12 +++--------- arch/x86/lib/usercopy_64.c | 8 ++------ include/asm-x86/uaccess.h | 18 ++++-------------- include/asm-x86/uaccess_32.h | 12 +++--------- include/asm-x86/uaccess_64.h | 12 +++--------- include/linux/kernel.h | 9 +++++++++ mm/memory.c | 15 +++++++++++++++ 7 files changed, 39 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 8eedde2a9cac..7393152a252e 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -32,9 +32,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon #define __do_strncpy_from_user(dst, src, count, res) \ do { \ int __d0, __d1, __d2; \ - might_sleep(); \ - if (current->mm) \ - might_lock_read(¤t->mm->mmap_sem); \ + might_fault(); \ __asm__ __volatile__( \ " testl %1,%1\n" \ " jz 2f\n" \ @@ -121,9 +119,7 @@ EXPORT_SYMBOL(strncpy_from_user); #define __do_clear_user(addr,size) \ do { \ int __d0; \ - might_sleep(); \ - if (current->mm) \ - might_lock_read(¤t->mm->mmap_sem); \ + might_fault(); \ __asm__ __volatile__( \ "0: rep; stosl\n" \ " movl %2,%0\n" \ @@ -193,9 +189,7 @@ long strnlen_user(const char __user *s, long n) unsigned long mask = -__addr_ok(s); unsigned long res, tmp; - might_sleep(); - if (current->mm) - might_lock_read(¤t->mm->mmap_sem); + might_fault(); __asm__ __volatile__( " testl %0, %0\n" diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 847d12945998..64d6c84e6353 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -15,9 +15,7 @@ #define __do_strncpy_from_user(dst,src,count,res) \ do { \ long __d0, __d1, __d2; \ - might_sleep(); \ - if (current->mm) \ - might_lock_read(¤t->mm->mmap_sem); \ + might_fault(); \ __asm__ __volatile__( \ " testq %1,%1\n" \ " jz 2f\n" \ @@ -66,9 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user); unsigned long __clear_user(void __user *addr, unsigned long size) { long __d0; - might_sleep(); - if (current->mm) - might_lock_read(¤t->mm->mmap_sem); + might_fault(); /* no memory constraint because it doesn't change any memory gcc knows about */ asm volatile( diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h index ad29752a1713..39f8420c75d9 100644 --- a/include/asm-x86/uaccess.h +++ b/include/asm-x86/uaccess.h @@ -8,8 +8,6 @@ #include #include #include -#include -#include #include #include @@ -159,9 +157,7 @@ extern int __get_user_bad(void); int __ret_gu; \ unsigned long __val_gu; \ __chk_user_ptr(ptr); \ - might_sleep(); \ - if (current->mm) \ - might_lock_read(¤t->mm->mmap_sem); \ + might_fault(); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_x(1, __ret_gu, __val_gu, ptr); \ @@ -246,9 +242,7 @@ extern void __put_user_8(void); int __ret_pu; \ __typeof__(*(ptr)) __pu_val; \ __chk_user_ptr(ptr); \ - might_sleep(); \ - if (current->mm) \ - might_lock_read(¤t->mm->mmap_sem); \ + might_fault(); \ __pu_val = x; \ switch (sizeof(*(ptr))) { \ case 1: \ @@ -273,9 +267,7 @@ extern void __put_user_8(void); #define __put_user_size(x, ptr, size, retval, errret) \ do { \ retval = 0; \ - might_sleep(); \ - if (current->mm) \ - might_lock_read(¤t->mm->mmap_sem); \ + might_fault(); \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ @@ -328,9 +320,7 @@ do { \ #define __get_user_size(x, ptr, size, retval, errret) \ do { \ retval = 0; \ - might_sleep(); \ - if (current->mm) \ - might_lock_read(¤t->mm->mmap_sem); \ + might_fault(); \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h index d725e2d703f7..d10e842ec3ee 100644 --- a/include/asm-x86/uaccess_32.h +++ b/include/asm-x86/uaccess_32.h @@ -82,9 +82,7 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) static __always_inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { - might_sleep(); - if (current->mm) - might_lock_read(¤t->mm->mmap_sem); + might_fault(); return __copy_to_user_inatomic(to, from, n); } @@ -139,9 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { - might_sleep(); - if (current->mm) - might_lock_read(¤t->mm->mmap_sem); + might_fault(); if (__builtin_constant_p(n)) { unsigned long ret; @@ -163,9 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __copy_from_user_nocache(void *to, const void __user *from, unsigned long n) { - might_sleep(); - if (current->mm) - might_lock_read(¤t->mm->mmap_sem); + might_fault(); if (__builtin_constant_p(n)) { unsigned long ret; diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h index 40a7205fe576..13fd56fbc3ab 100644 --- a/include/asm-x86/uaccess_64.h +++ b/include/asm-x86/uaccess_64.h @@ -29,9 +29,7 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size) { int ret = 0; - might_sleep(); - if (current->mm) - might_lock_read(¤t->mm->mmap_sem); + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { @@ -75,9 +73,7 @@ int __copy_to_user(void __user *dst, const void *src, unsigned size) { int ret = 0; - might_sleep(); - if (current->mm) - might_lock_read(¤t->mm->mmap_sem); + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, src, size); switch (size) { @@ -121,9 +117,7 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) { int ret = 0; - might_sleep(); - if (current->mm) - might_lock_read(¤t->mm->mmap_sem); + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, (__force void *)src, size); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2651f805ba6d..e580ec095765 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -140,6 +140,15 @@ extern int _cond_resched(void); (__x < 0) ? -__x : __x; \ }) +#ifdef CONFIG_PROVE_LOCKING +void might_fault(void); +#else +static inline void might_fault(void) +{ + might_sleep(); +} +#endif + extern struct atomic_notifier_head panic_notifier_list; extern long (*panic_blink)(long time); NORET_TYPE void panic(const char * fmt, ...) diff --git a/mm/memory.c b/mm/memory.c index 1002f473f497..b8fdf4e5e65b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3016,3 +3016,18 @@ void print_vma_addr(char *prefix, unsigned long ip) } up_read(¤t->mm->mmap_sem); } + +#ifdef CONFIG_PROVE_LOCKING +void might_fault(void) +{ + might_sleep(); + /* + * it would be nicer only to annotate paths which are not under + * pagefault_disable, however that requires a larger audit and + * providing helpers like get_user_atomic. + */ + if (!in_atomic() && current->mm) + might_lock_read(¤t->mm->mmap_sem); +} +EXPORT_SYMBOL(might_fault); +#endif -- cgit v1.2.3 From 1d18ef489509314506328b9e464dd47c24c1d68f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Sep 2008 20:53:21 +0200 Subject: x86: some lock annotations for user copy paths, v3 - add annotation back to clear_user() - change probe_kernel_address() to _inatomic*() method Signed-off-by: Ingo Molnar --- arch/x86/lib/usercopy_32.c | 1 + include/asm-x86/uaccess.h | 2 -- include/linux/uaccess.h | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 7393152a252e..fab5faba1d3e 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -148,6 +148,7 @@ do { \ unsigned long clear_user(void __user *to, unsigned long n) { + might_fault(); if (access_ok(VERIFY_WRITE, to, n)) __do_clear_user(to, n); return n; diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h index 39f8420c75d9..dc8edb5c4659 100644 --- a/include/asm-x86/uaccess.h +++ b/include/asm-x86/uaccess.h @@ -267,7 +267,6 @@ extern void __put_user_8(void); #define __put_user_size(x, ptr, size, retval, errret) \ do { \ retval = 0; \ - might_fault(); \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ @@ -320,7 +319,6 @@ do { \ #define __get_user_size(x, ptr, size, retval, errret) \ do { \ retval = 0; \ - might_fault(); \ __chk_user_ptr(ptr); \ switch (size) { \ case 1: \ diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index fec6decfb983..2062293e57e6 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to, \ set_fs(KERNEL_DS); \ pagefault_disable(); \ - ret = __get_user(retval, (__force typeof(retval) __user *)(addr)); \ + ret = __copy_from_user_inatomic((__force typeof(retval) __user *)(addr), &(retval), sizeof(retval)); \ pagefault_enable(); \ set_fs(old_fs); \ ret; \ -- cgit v1.2.3 From 53b9d87f41a3d8838210ad7cdef02d814817ce85 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 11 Sep 2008 17:02:58 -0700 Subject: lock debug: sit tight when we are already in a panic in: > http://bugzilla.kernel.org/show_bug.cgi?id=11543 The panic code called the kexec code which called mutex_trylock() which called spin_lock_mutex() which then stupidly went and blurted a load of debug stuff because of in_interrupt(). Keep the lock debug code from escallating an already crappy situation. Signed-off-by: Ingo Molnar --- include/linux/debug_locks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 4aaa4afb1cb9..096476f1fb35 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -17,7 +17,7 @@ extern int debug_locks_off(void); ({ \ int __ret = 0; \ \ - if (unlikely(c)) { \ + if (!oops_in_progress && unlikely(c)) { \ if (debug_locks_off() && !debug_locks_silent) \ WARN_ON(1); \ __ret = 1; \ -- cgit v1.2.3 From 30742d5c2277c325fb0e9d2d817d55a19995fe8f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Sep 2008 14:43:39 +0200 Subject: Revert "lockdep: fix compilation when CONFIG_TRACE_IRQFLAGS_SUPPORT is not set" This reverts commit bd8fbdee6562ee526f3c2582a3b373ef195015dd. This broke the powerpc build - more fixes are needed before we can undo this revert. --- include/linux/irqflags.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index f2993512b3b5..74bde13224c9 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -52,10 +52,10 @@ # define start_critical_timings() do { } while (0) #endif -#include - #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT +#include + #define local_irq_enable() \ do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) #define local_irq_disable() \ @@ -84,20 +84,21 @@ * The local_irq_*() APIs are equal to the raw_local_irq*() * if !TRACE_IRQFLAGS. */ -#define local_irq_disable() raw_local_irq_disable() -#define local_irq_enable() raw_local_irq_enable() -#define local_irq_save(flags) \ +# define raw_local_irq_disable() local_irq_disable() +# define raw_local_irq_enable() local_irq_enable() +# define raw_local_irq_save(flags) \ do { \ typecheck(unsigned long, flags); \ - raw_local_irq_save(flags); \ + local_irq_save(flags); \ } while (0) -# define local_irq_restore(flags) \ +# define raw_local_irq_restore(flags) \ do { \ typecheck(unsigned long, flags); \ - raw_local_irq_restore(flags); \ + local_irq_restore(flags); \ } while (0) #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ +#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #define safe_halt() \ do { \ trace_hardirqs_on(); \ @@ -123,5 +124,6 @@ typecheck(unsigned long, flags); \ raw_irqs_disabled_flags(flags); \ }) +#endif /* CONFIG_X86 */ #endif -- cgit v1.2.3 From fb71e45338453698bd7460f7e8f171ea0304d218 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 15 Sep 2008 18:04:26 -0700 Subject: uaccess: fix parameters inversion for __copy_from_user_inatomic() The following patch changes to use __copy_from_user_inatomic(), but the passing parameters incorrect: x86: some lock annotations for user copy paths, v3 This fixes the netfilter crash reported by Steven Noonan. Reported-by: Steven Noonan Signed-off-by: Hiroshi Shimamoto Tested-by: Steven Noonan Signed-off-by: Ingo Molnar --- include/linux/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 2062293e57e6..6b58367d145e 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to, \ set_fs(KERNEL_DS); \ pagefault_disable(); \ - ret = __copy_from_user_inatomic((__force typeof(retval) __user *)(addr), &(retval), sizeof(retval)); \ + ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \ pagefault_enable(); \ set_fs(old_fs); \ ret; \ -- cgit v1.2.3 From 38d47c1b7075bd7ec3881141bb3629da58f88dab Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Sep 2008 19:32:20 +0200 Subject: futex: rely on get_user_pages() for shared futexes On the way of getting rid of the mmap_sem requirement for shared futexes, start by relying on get_user_pages(). Signed-off-by: Peter Zijlstra Acked-by: Nick Piggin Signed-off-by: Ingo Molnar --- include/linux/futex.h | 2 + kernel/futex.c | 162 +++++++++++++++++++++++++------------------------- 2 files changed, 82 insertions(+), 82 deletions(-) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 586ab56a3ec3..8f627b9ae2b1 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -164,6 +164,8 @@ union futex_key { } both; }; +#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } + #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); extern void exit_pi_state_list(struct task_struct *curr); diff --git a/kernel/futex.c b/kernel/futex.c index 7d1136e97c14..a4c39fa0a7a3 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -161,6 +161,45 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2) && key1->both.offset == key2->both.offset); } +/* + * Take a reference to the resource addressed by a key. + * Can be called while holding spinlocks. + * + */ +static void get_futex_key_refs(union futex_key *key) +{ + if (!key->both.ptr) + return; + + switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { + case FUT_OFF_INODE: + atomic_inc(&key->shared.inode->i_count); + break; + case FUT_OFF_MMSHARED: + atomic_inc(&key->private.mm->mm_count); + break; + } +} + +/* + * Drop a reference to the resource addressed by a key. + * The hash bucket spinlock must not be held. + */ +static void drop_futex_key_refs(union futex_key *key) +{ + if (!key->both.ptr) + return; + + switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { + case FUT_OFF_INODE: + iput(key->shared.inode); + break; + case FUT_OFF_MMSHARED: + mmdrop(key->private.mm); + break; + } +} + /** * get_futex_key - Get parameters which are the keys for a futex. * @uaddr: virtual address of the futex @@ -184,7 +223,6 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, { unsigned long address = (unsigned long)uaddr; struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; struct page *page; int err; @@ -210,98 +248,47 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, key->private.address = address; return 0; } - /* - * The futex is hashed differently depending on whether - * it's in a shared or private mapping. So check vma first. - */ - vma = find_extend_vma(mm, address); - if (unlikely(!vma)) - return -EFAULT; - /* - * Permissions. - */ - if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) - return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES; +again: + err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL); + if (err < 0) + return err; + + lock_page(page); + if (!page->mapping) { + unlock_page(page); + put_page(page); + goto again; + } /* * Private mappings are handled in a simple way. * * NOTE: When userspace waits on a MAP_SHARED mapping, even if * it's a read-only handle, it's expected that futexes attach to - * the object not the particular process. Therefore we use - * VM_MAYSHARE here, not VM_SHARED which is restricted to shared - * mappings of _writable_ handles. + * the object not the particular process. */ - if (likely(!(vma->vm_flags & VM_MAYSHARE))) { - key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */ + if (PageAnon(page)) { + key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ key->private.mm = mm; key->private.address = address; - return 0; - } - - /* - * Linear file mappings are also simple. - */ - key->shared.inode = vma->vm_file->f_path.dentry->d_inode; - key->both.offset |= FUT_OFF_INODE; /* inode-based key. */ - if (likely(!(vma->vm_flags & VM_NONLINEAR))) { - key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT) - + vma->vm_pgoff); - return 0; + } else { + key->both.offset |= FUT_OFF_INODE; /* inode-based key */ + key->shared.inode = page->mapping->host; + key->shared.pgoff = page->index; } - /* - * We could walk the page table to read the non-linear - * pte, and get the page index without fetching the page - * from swap. But that's a lot of code to duplicate here - * for a rare case, so we simply fetch the page. - */ - err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL); - if (err >= 0) { - key->shared.pgoff = - page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); - put_page(page); - return 0; - } - return err; -} + get_futex_key_refs(key); -/* - * Take a reference to the resource addressed by a key. - * Can be called while holding spinlocks. - * - */ -static void get_futex_key_refs(union futex_key *key) -{ - if (key->both.ptr == NULL) - return; - switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { - case FUT_OFF_INODE: - atomic_inc(&key->shared.inode->i_count); - break; - case FUT_OFF_MMSHARED: - atomic_inc(&key->private.mm->mm_count); - break; - } + unlock_page(page); + put_page(page); + return 0; } -/* - * Drop a reference to the resource addressed by a key. - * The hash bucket spinlock must not be held. - */ -static void drop_futex_key_refs(union futex_key *key) +static inline +void put_futex_key(struct rw_semaphore *fshared, union futex_key *key) { - if (!key->both.ptr) - return; - switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { - case FUT_OFF_INODE: - iput(key->shared.inode); - break; - case FUT_OFF_MMSHARED: - mmdrop(key->private.mm); - break; - } + drop_futex_key_refs(key); } static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) @@ -385,6 +372,7 @@ static int refill_pi_state_cache(void) /* pi_mutex gets initialized later */ pi_state->owner = NULL; atomic_set(&pi_state->refcount, 1); + pi_state->key = FUTEX_KEY_INIT; current->pi_state_cache = pi_state; @@ -462,7 +450,7 @@ void exit_pi_state_list(struct task_struct *curr) struct list_head *next, *head = &curr->pi_state_list; struct futex_pi_state *pi_state; struct futex_hash_bucket *hb; - union futex_key key; + union futex_key key = FUTEX_KEY_INIT; if (!futex_cmpxchg_enabled) return; @@ -725,7 +713,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, struct futex_hash_bucket *hb; struct futex_q *this, *next; struct plist_head *head; - union futex_key key; + union futex_key key = FUTEX_KEY_INIT; int ret; if (!bitset) @@ -760,6 +748,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, spin_unlock(&hb->lock); out: + put_futex_key(fshared, &key); futex_unlock_mm(fshared); return ret; } @@ -773,7 +762,7 @@ futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, u32 __user *uaddr2, int nr_wake, int nr_wake2, int op) { - union futex_key key1, key2; + union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; struct futex_hash_bucket *hb1, *hb2; struct plist_head *head; struct futex_q *this, *next; @@ -873,6 +862,8 @@ retry: if (hb1 != hb2) spin_unlock(&hb2->lock); out: + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); futex_unlock_mm(fshared); return ret; @@ -886,7 +877,7 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, u32 __user *uaddr2, int nr_wake, int nr_requeue, u32 *cmpval) { - union futex_key key1, key2; + union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; struct futex_hash_bucket *hb1, *hb2; struct plist_head *head1; struct futex_q *this, *next; @@ -974,6 +965,8 @@ out_unlock: drop_futex_key_refs(&key1); out: + put_futex_key(fshared, &key2); + put_futex_key(fshared, &key1); futex_unlock_mm(fshared); return ret; } @@ -1220,6 +1213,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, retry: futex_lock_mm(fshared); + q.key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, fshared, &q.key); if (unlikely(ret != 0)) goto out_release_sem; @@ -1360,6 +1354,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, queue_unlock(&q, hb); out_release_sem: + put_futex_key(fshared, &q.key); futex_unlock_mm(fshared); return ret; } @@ -1411,6 +1406,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, retry: futex_lock_mm(fshared); + q.key = FUTEX_KEY_INIT; ret = get_futex_key(uaddr, fshared, &q.key); if (unlikely(ret != 0)) goto out_release_sem; @@ -1625,6 +1621,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, queue_unlock(&q, hb); out_release_sem: + put_futex_key(fshared, &q.key); futex_unlock_mm(fshared); if (to) destroy_hrtimer_on_stack(&to->timer); @@ -1671,7 +1668,7 @@ static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared) struct futex_q *this, *next; u32 uval; struct plist_head *head; - union futex_key key; + union futex_key key = FUTEX_KEY_INIT; int ret, attempt = 0; retry: @@ -1744,6 +1741,7 @@ retry_unlocked: out_unlock: spin_unlock(&hb->lock); out: + put_futex_key(fshared, &key); futex_unlock_mm(fshared); return ret; -- cgit v1.2.3 From 6b9331165e9827e055389e22d1cbdb5fe3cff835 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 16 Oct 2008 11:00:07 +0100 Subject: ALSA: ASoC: Remove snd_soc_dapm_connect_input() This was marked as deprecated in 2.6.27 and all users except for playpaq_wm8510 fixed in that release. Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai --- include/sound/soc-dapm.h | 2 -- sound/soc/soc-dapm.c | 22 ---------------------- 2 files changed, 24 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index ca699a3017f3..7ee2f70ca42e 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -221,8 +221,6 @@ int snd_soc_dapm_new_controls(struct snd_soc_codec *codec, int num); /* dapm path setup */ -int __deprecated snd_soc_dapm_connect_input(struct snd_soc_codec *codec, - const char *sink_name, const char *control_name, const char *src_name); int snd_soc_dapm_new_widgets(struct snd_soc_codec *codec); void snd_soc_dapm_free(struct snd_soc_device *socdev); int snd_soc_dapm_add_routes(struct snd_soc_codec *codec, diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index efbd0b37810a..4060fc54bbb1 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1006,28 +1006,6 @@ err: return ret; } -/** - * snd_soc_dapm_connect_input - connect dapm widgets - * @codec: audio codec - * @sink: name of target widget - * @control: mixer control name - * @source: name of source name - * - * Connects 2 dapm widgets together via a named audio path. The sink is - * the widget receiving the audio signal, whilst the source is the sender - * of the audio signal. - * - * This function has been deprecated in favour of snd_soc_dapm_add_routes(). - * - * Returns 0 for success else error. - */ -int snd_soc_dapm_connect_input(struct snd_soc_codec *codec, const char *sink, - const char *control, const char *source) -{ - return snd_soc_dapm_add_route(codec, sink, control, source); -} -EXPORT_SYMBOL_GPL(snd_soc_dapm_connect_input); - /** * snd_soc_dapm_add_routes - Add routes between DAPM widgets * @codec: codec -- cgit v1.2.3 From c7e78cff6b7518212247fb20b1dc6411540dc9af Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 Oct 2008 23:17:09 +0200 Subject: lockstat: contend with points We currently only provide points that have to wait on contention, also lists the points we have to wait for. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- Documentation/lockstat.txt | 50 +++++++++++++++++++++++++++++----------------- include/linux/lockdep.h | 13 ++++++++---- kernel/lockdep.c | 33 +++++++++++++++++++----------- kernel/lockdep_proc.c | 21 +++++++++++++++++-- kernel/mutex.c | 2 +- 5 files changed, 82 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index 02f36f5c64fe..9cb9138f7a79 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt @@ -71,34 +71,48 @@ Look at the current lock statistics: # less /proc/lock_stat -01 lock_stat version 0.2 +01 lock_stat version 0.3 02 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 03 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total 04 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 05 -06 &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60 -07 &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38 -08 -------------------------- -09 &inode->i_data.tree_lock 0 [] add_to_page_cache+0x5f/0x190 -10 -11 ............................................................................................................................................................................................... -12 -13 dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 -14 ----------- -15 dcache_lock 180 [] sys_getcwd+0x11e/0x230 -16 dcache_lock 165 [] d_alloc+0x15a/0x210 -17 dcache_lock 33 [] _atomic_dec_and_lock+0x4d/0x70 -18 dcache_lock 1 [] shrink_dcache_parent+0x18/0x130 +06 &mm->mmap_sem-W: 233 538 18446744073708 22924.27 607243.51 1342 45806 1.71 8595.89 1180582.34 +07 &mm->mmap_sem-R: 205 587 18446744073708 28403.36 731975.00 1940 412426 0.58 187825.45 6307502.88 +08 --------------- +09 &mm->mmap_sem 487 [] do_page_fault+0x466/0x928 +10 &mm->mmap_sem 179 [] sys_mprotect+0xcd/0x21d +11 &mm->mmap_sem 279 [] sys_mmap+0x75/0xce +12 &mm->mmap_sem 76 [] sys_munmap+0x32/0x59 +13 --------------- +14 &mm->mmap_sem 270 [] sys_mmap+0x75/0xce +15 &mm->mmap_sem 431 [] do_page_fault+0x466/0x928 +16 &mm->mmap_sem 138 [] sys_munmap+0x32/0x59 +17 &mm->mmap_sem 145 [] sys_mprotect+0xcd/0x21d +18 +19 ............................................................................................................................................................................................... +20 +21 dcache_lock: 621 623 0.52 118.26 1053.02 6745 91930 0.29 316.29 118423.41 +22 ----------- +23 dcache_lock 179 [] _atomic_dec_and_lock+0x34/0x54 +24 dcache_lock 113 [] d_alloc+0x19a/0x1eb +25 dcache_lock 99 [] d_rehash+0x1b/0x44 +26 dcache_lock 104 [] d_instantiate+0x36/0x8a +27 ----------- +28 dcache_lock 192 [] _atomic_dec_and_lock+0x34/0x54 +29 dcache_lock 98 [] d_rehash+0x1b/0x44 +30 dcache_lock 72 [] d_alloc+0x19a/0x1eb +31 dcache_lock 112 [] d_instantiate+0x36/0x8a This excerpt shows the first two lock class statistics. Line 01 shows the output version - each time the format changes this will be updated. Line 02-04 -show the header with column descriptions. Lines 05-10 and 13-18 show the actual +show the header with column descriptions. Lines 05-18 and 20-31 show the actual statistics. These statistics come in two parts; the actual stats separated by a -short separator (line 08, 14) from the contention points. +short separator (line 08, 13) from the contention points. -The first lock (05-10) is a read/write lock, and shows two lines above the +The first lock (05-18) is a read/write lock, and shows two lines above the short separator. The contention points don't match the column descriptors, -they have two: contentions and [] symbol. +they have two: contentions and [] symbol. The second set of contention +points are the points we're contending with. The integer part of the time values is in us. diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 0aa657aa8a1e..fc9f8e88123b 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -73,6 +73,8 @@ struct lock_class_key { struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; }; +#define LOCKSTAT_POINTS 4 + /* * The lock-class itself: */ @@ -119,7 +121,8 @@ struct lock_class { int name_version; #ifdef CONFIG_LOCK_STAT - unsigned long contention_point[4]; + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; #endif }; @@ -144,6 +147,7 @@ enum bounce_type { struct lock_class_stats { unsigned long contention_point[4]; + unsigned long contending_point[4]; struct lock_time read_waittime; struct lock_time write_waittime; struct lock_time read_holdtime; @@ -165,6 +169,7 @@ struct lockdep_map { const char *name; #ifdef CONFIG_LOCK_STAT int cpu; + unsigned long ip; #endif }; @@ -355,7 +360,7 @@ struct lock_class_key { }; #ifdef CONFIG_LOCK_STAT extern void lock_contended(struct lockdep_map *lock, unsigned long ip); -extern void lock_acquired(struct lockdep_map *lock); +extern void lock_acquired(struct lockdep_map *lock, unsigned long ip); #define LOCK_CONTENDED(_lock, try, lock) \ do { \ @@ -363,13 +368,13 @@ do { \ lock_contended(&(_lock)->dep_map, _RET_IP_); \ lock(_lock); \ } \ - lock_acquired(&(_lock)->dep_map); \ + lock_acquired(&(_lock)->dep_map, _RET_IP_); \ } while (0) #else /* CONFIG_LOCK_STAT */ #define lock_contended(lockdep_map, ip) do {} while (0) -#define lock_acquired(lockdep_map) do {} while (0) +#define lock_acquired(lockdep_map, ip) do {} while (0) #define LOCK_CONTENDED(_lock, try, lock) \ lock(_lock) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index dbda475b13bd..234a9dccb4be 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -136,16 +136,16 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock) #ifdef CONFIG_LOCK_STAT static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); -static int lock_contention_point(struct lock_class *class, unsigned long ip) +static int lock_point(unsigned long points[], unsigned long ip) { int i; - for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { - if (class->contention_point[i] == 0) { - class->contention_point[i] = ip; + for (i = 0; i < LOCKSTAT_POINTS; i++) { + if (points[i] == 0) { + points[i] = ip; break; } - if (class->contention_point[i] == ip) + if (points[i] == ip) break; } @@ -185,6 +185,9 @@ struct lock_class_stats lock_stats(struct lock_class *class) for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) stats.contention_point[i] += pcs->contention_point[i]; + for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++) + stats.contending_point[i] += pcs->contending_point[i]; + lock_time_add(&pcs->read_waittime, &stats.read_waittime); lock_time_add(&pcs->write_waittime, &stats.write_waittime); @@ -209,6 +212,7 @@ void clear_lock_stats(struct lock_class *class) memset(cpu_stats, 0, sizeof(struct lock_class_stats)); } memset(class->contention_point, 0, sizeof(class->contention_point)); + memset(class->contending_point, 0, sizeof(class->contending_point)); } static struct lock_class_stats *get_lock_stats(struct lock_class *class) @@ -3001,7 +3005,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) struct held_lock *hlock, *prev_hlock; struct lock_class_stats *stats; unsigned int depth; - int i, point; + int i, contention_point, contending_point; depth = curr->lockdep_depth; if (DEBUG_LOCKS_WARN_ON(!depth)) @@ -3025,18 +3029,22 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) found_it: hlock->waittime_stamp = sched_clock(); - point = lock_contention_point(hlock_class(hlock), ip); + contention_point = lock_point(hlock_class(hlock)->contention_point, ip); + contending_point = lock_point(hlock_class(hlock)->contending_point, + lock->ip); stats = get_lock_stats(hlock_class(hlock)); - if (point < ARRAY_SIZE(stats->contention_point)) - stats->contention_point[point]++; + if (contention_point < LOCKSTAT_POINTS) + stats->contention_point[contention_point]++; + if (contending_point < LOCKSTAT_POINTS) + stats->contending_point[contending_point]++; if (lock->cpu != smp_processor_id()) stats->bounces[bounce_contended + !!hlock->read]++; put_lock_stats(stats); } static void -__lock_acquired(struct lockdep_map *lock) +__lock_acquired(struct lockdep_map *lock, unsigned long ip) { struct task_struct *curr = current; struct held_lock *hlock, *prev_hlock; @@ -3085,6 +3093,7 @@ found_it: put_lock_stats(stats); lock->cpu = cpu; + lock->ip = ip; } void lock_contended(struct lockdep_map *lock, unsigned long ip) @@ -3106,7 +3115,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) } EXPORT_SYMBOL_GPL(lock_contended); -void lock_acquired(struct lockdep_map *lock) +void lock_acquired(struct lockdep_map *lock, unsigned long ip) { unsigned long flags; @@ -3119,7 +3128,7 @@ void lock_acquired(struct lockdep_map *lock) raw_local_irq_save(flags); check_flags(flags); current->lockdep_recursion = 1; - __lock_acquired(lock); + __lock_acquired(lock, ip); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 8d3a6eba8d5a..13716b813896 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -557,7 +557,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) if (stats->read_holdtime.nr) namelen += 2; - for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { + for (i = 0; i < LOCKSTAT_POINTS; i++) { char sym[KSYM_SYMBOL_LEN]; char ip[32]; @@ -574,6 +574,23 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) stats->contention_point[i], ip, sym); } + for (i = 0; i < LOCKSTAT_POINTS; i++) { + char sym[KSYM_SYMBOL_LEN]; + char ip[32]; + + if (class->contending_point[i] == 0) + break; + + if (!i) + seq_line(m, '-', 40-namelen, namelen); + + sprint_symbol(sym, class->contending_point[i]); + snprintf(ip, sizeof(ip), "[<%p>]", + (void *)class->contending_point[i]); + seq_printf(m, "%40s %14lu %29s %s\n", name, + stats->contending_point[i], + ip, sym); + } if (i) { seq_puts(m, "\n"); seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); @@ -583,7 +600,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) static void seq_header(struct seq_file *m) { - seq_printf(m, "lock_stat version 0.2\n"); + seq_printf(m, "lock_stat version 0.3\n"); seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " "%14s %14s\n", diff --git a/kernel/mutex.c b/kernel/mutex.c index 12c779dc65d4..39a3816b68d9 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -184,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } done: - lock_acquired(&lock->dep_map); + lock_acquired(&lock->dep_map, ip); /* got the lock - rejoice! */ mutex_remove_waiter(lock, &waiter, task_thread_info(task)); debug_mutex_set_owner(lock, task_thread_info(task)); -- cgit v1.2.3 From bbaf5e97337287479eb78dbc3822d9560bbfd2e2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 24 Oct 2008 18:16:50 +0200 Subject: ALSA: Add hrtimer backend for ALSA timer interface Added the hrtimer backend for ALSA timer interface. It can be used for the sequencer timer source. Signed-off-by: Takashi Iwai --- include/sound/asound.h | 1 + sound/core/Kconfig | 21 ++++++++ sound/core/Makefile | 2 + sound/core/hrtimer.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++++ sound/core/seq/seq.c | 4 +- 5 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 sound/core/hrtimer.c (limited to 'include') diff --git a/include/sound/asound.h b/include/sound/asound.h index 2c4dc908a54a..1c02ed1d7c4a 100644 --- a/include/sound/asound.h +++ b/include/sound/asound.h @@ -575,6 +575,7 @@ enum { #define SNDRV_TIMER_GLOBAL_SYSTEM 0 #define SNDRV_TIMER_GLOBAL_RTC 1 #define SNDRV_TIMER_GLOBAL_HPET 2 +#define SNDRV_TIMER_GLOBAL_HRTIMER 3 /* info flags */ #define SNDRV_TIMER_FLG_SLAVE (1<<0) /* cannot be controlled */ diff --git a/sound/core/Kconfig b/sound/core/Kconfig index 66348c92f88d..406a45010985 100644 --- a/sound/core/Kconfig +++ b/sound/core/Kconfig @@ -95,6 +95,26 @@ config SND_SEQUENCER_OSS this will be compiled as a module. The module will be called snd-seq-oss. +config SND_HRTIMER + tristate "HR-timer backend support" + depends on HIGH_RES_TIMERS + select SND_TIMER + help + Say Y here to enable HR-timer backend for ALSA timer. ALSA uses + the hrtimer as a precise timing source. The ALSA sequencer code + also can use this timing source. + + To compile this driver as a module, choose M here: the module + will be called snd-hrtimer. + +config SND_SEQ_HRTIMER_DEFAULT + bool "Use HR-timer as default sequencer timer" + depends on SND_HRTIMER && SND_SEQUENCER + default y + help + Say Y here to use the HR-timer backend as the default sequencer + timer. + config SND_RTCTIMER tristate "RTC Timer support" depends on RTC @@ -114,6 +134,7 @@ config SND_RTCTIMER config SND_SEQ_RTCTIMER_DEFAULT bool "Use RTC as default sequencer timer" depends on SND_RTCTIMER && SND_SEQUENCER + dpeends on !SND_SEQ_HRTIMER_DEFAULT default y help Say Y here to use the RTC timer as the default sequencer diff --git a/sound/core/Makefile b/sound/core/Makefile index d57125a5687d..4229052e7b91 100644 --- a/sound/core/Makefile +++ b/sound/core/Makefile @@ -17,12 +17,14 @@ snd-page-alloc-$(CONFIG_HAS_DMA) += sgbuf.o snd-rawmidi-objs := rawmidi.o snd-timer-objs := timer.o +snd-hrtimer-objs := hrtimer.o snd-rtctimer-objs := rtctimer.o snd-hwdep-objs := hwdep.o obj-$(CONFIG_SND) += snd.o obj-$(CONFIG_SND_HWDEP) += snd-hwdep.o obj-$(CONFIG_SND_TIMER) += snd-timer.o +obj-$(CONFIG_SND_HRTIMER) += snd-hrtimer.o obj-$(CONFIG_SND_RTCTIMER) += snd-rtctimer.o obj-$(CONFIG_SND_PCM) += snd-pcm.o snd-page-alloc.o obj-$(CONFIG_SND_RAWMIDI) += snd-rawmidi.o diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c new file mode 100644 index 000000000000..b712d7f211ff --- /dev/null +++ b/sound/core/hrtimer.c @@ -0,0 +1,140 @@ +/* + */ + +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Takashi Iwai "); +MODULE_DESCRIPTION("ALSA hrtimer backend"); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS("snd-timer-" __stringify(SNDRV_TIMER_GLOBAL_HRTIMER)); + +#define NANO_SEC 1000000000UL /* 10^9 in sec */ +static unsigned int resolution; + +struct snd_hrtimer { + struct snd_timer *timer; + struct hrtimer hrt; +}; + +static enum hrtimer_restart snd_hrtimer_callback(struct hrtimer *hrt) +{ + struct snd_hrtimer *stime = container_of(hrt, struct snd_hrtimer, hrt); + struct snd_timer *t = stime->timer; + hrtimer_forward_now(hrt, ktime_set(0, t->sticks * resolution)); + snd_timer_interrupt(stime->timer, t->sticks); + return HRTIMER_RESTART; +} + +static int snd_hrtimer_open(struct snd_timer *t) +{ + struct snd_hrtimer *stime; + + stime = kmalloc(sizeof(*stime), GFP_KERNEL); + if (!stime) + return -ENOMEM; + hrtimer_init(&stime->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + stime->timer = t; + stime->hrt.cb_mode = HRTIMER_CB_SOFTIRQ; + stime->hrt.function = snd_hrtimer_callback; + t->private_data = stime; + return 0; +} + +static int snd_hrtimer_close(struct snd_timer *t) +{ + struct snd_hrtimer *stime = t->private_data; + + if (stime) { + hrtimer_cancel(&stime->hrt); + kfree(stime); + t->private_data = NULL; + } + return 0; +} + +static int snd_hrtimer_start(struct snd_timer *t) +{ + struct snd_hrtimer *stime = t->private_data; + + hrtimer_start(&stime->hrt, ktime_set(0, t->sticks * resolution), + HRTIMER_MODE_REL); + return 0; +} + +static int snd_hrtimer_stop(struct snd_timer *t) +{ + struct snd_hrtimer *stime = t->private_data; + + hrtimer_cancel(&stime->hrt); + return 0; +} + +static struct snd_timer_hardware hrtimer_hw = { + .flags = (SNDRV_TIMER_HW_AUTO | + /*SNDRV_TIMER_HW_FIRST |*/ + SNDRV_TIMER_HW_TASKLET), + .open = snd_hrtimer_open, + .close = snd_hrtimer_close, + .start = snd_hrtimer_start, + .stop = snd_hrtimer_stop, +}; + +/* + * entry functions + */ + +static struct snd_timer *mytimer; + +static int __init snd_hrtimer_init(void) +{ + struct snd_timer *timer; + struct timespec tp; + int err; + + hrtimer_get_res(CLOCK_MONOTONIC, &tp); + if (tp.tv_sec > 0 || !tp.tv_nsec) { + snd_printk(KERN_ERR + "snd-hrtimer: Invalid resolution %u.%09u", + (unsigned)tp.tv_sec, (unsigned)tp.tv_nsec); + return -EINVAL; + } + resolution = tp.tv_nsec; + + /* Create a new timer and set up the fields */ + err = snd_timer_global_new("hrtimer", SNDRV_TIMER_GLOBAL_HRTIMER, + &timer); + if (err < 0) + return err; + + timer->module = THIS_MODULE; + strcpy(timer->name, "HR timer"); + timer->hw = hrtimer_hw; + timer->hw.resolution = resolution; + timer->hw.ticks = NANO_SEC / resolution; + + err = snd_timer_global_register(timer); + if (err < 0) { + snd_timer_global_free(timer); + return err; + } + mytimer = timer; /* remember this */ + + return 0; +} + +static void __exit snd_hrtimer_exit(void) +{ + if (mytimer) { + snd_timer_global_free(mytimer); + mytimer = NULL; + } +} + +module_init(snd_hrtimer_init); +module_exit(snd_hrtimer_exit); diff --git a/sound/core/seq/seq.c b/sound/core/seq/seq.c index ee0f8405ab35..bf09a5ad1865 100644 --- a/sound/core/seq/seq.c +++ b/sound/core/seq/seq.c @@ -43,7 +43,9 @@ int seq_default_timer_class = SNDRV_TIMER_CLASS_GLOBAL; int seq_default_timer_sclass = SNDRV_TIMER_SCLASS_NONE; int seq_default_timer_card = -1; int seq_default_timer_device = -#ifdef CONFIG_SND_SEQ_RTCTIMER_DEFAULT +#ifdef CONFIG_SND_SEQ_HRTIMER_DEFAULT + SNDRV_TIMER_GLOBAL_HRTIMER +#elif defined(CONFIG_SND_SEQ_RTCTIMER_DEFAULT) SNDRV_TIMER_GLOBAL_RTC #else SNDRV_TIMER_GLOBAL_SYSTEM -- cgit v1.2.3 From a53ccab3ccac9e8676a683df9822a2daec83ef54 Mon Sep 17 00:00:00 2001 From: Matthew Ranostay Date: Sat, 25 Oct 2008 01:05:04 -0400 Subject: ALSA: jack: lineout support to jack abstraction layer This patch introduces support for reporting SW_LINEOUT_INSERT detection events via the jack abstraction layer. Also adds a SND_JACK_LINEOUT define to the input system header. Signed-off-by: Matthew Ranostay Cc: Dmitry Torokhov Acked-by: Mark Brown Signed-off-by: Takashi Iwai --- include/linux/input.h | 1 + include/sound/jack.h | 1 + sound/core/jack.c | 6 ++++++ 3 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index a5802c9c81a4..7323d2ff5151 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -644,6 +644,7 @@ struct input_absinfo { #define SW_RADIO SW_RFKILL_ALL /* deprecated */ #define SW_MICROPHONE_INSERT 0x04 /* set = inserted */ #define SW_DOCK 0x05 /* set = plugged into dock */ +#define SW_LINEOUT_INSERT 0x06 /* set = inserted */ #define SW_MAX 0x0f #define SW_CNT (SW_MAX+1) diff --git a/include/sound/jack.h b/include/sound/jack.h index b1b2b8b59adb..7cb25f4b50bb 100644 --- a/include/sound/jack.h +++ b/include/sound/jack.h @@ -35,6 +35,7 @@ enum snd_jack_types { SND_JACK_HEADPHONE = 0x0001, SND_JACK_MICROPHONE = 0x0002, SND_JACK_HEADSET = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE, + SND_JACK_LINEOUT = 0x0004, }; struct snd_jack { diff --git a/sound/core/jack.c b/sound/core/jack.c index 8133a2b173a5..c4bb9bad3133 100644 --- a/sound/core/jack.c +++ b/sound/core/jack.c @@ -102,6 +102,9 @@ int snd_jack_new(struct snd_card *card, const char *id, int type, if (type & SND_JACK_HEADPHONE) input_set_capability(jack->input_dev, EV_SW, SW_HEADPHONE_INSERT); + if (type & SND_JACK_LINEOUT) + input_set_capability(jack->input_dev, EV_SW, + SW_LINEOUT_INSERT); if (type & SND_JACK_MICROPHONE) input_set_capability(jack->input_dev, EV_SW, SW_MICROPHONE_INSERT); @@ -150,6 +153,9 @@ void snd_jack_report(struct snd_jack *jack, int status) if (jack->type & SND_JACK_HEADPHONE) input_report_switch(jack->input_dev, SW_HEADPHONE_INSERT, status & SND_JACK_HEADPHONE); + if (jack->type & SND_JACK_LINEOUT) + input_report_switch(jack->input_dev, SW_LINEOUT_INSERT, + status & SND_JACK_LINEOUT); if (jack->type & SND_JACK_MICROPHONE) input_report_switch(jack->input_dev, SW_MICROPHONE_INSERT, status & SND_JACK_MICROPHONE); -- cgit v1.2.3 From 505e371da195fad20cb8aaf45407a2849774d6d0 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 15 Oct 2008 14:56:42 +0800 Subject: markers: remove exported symbol marker_probe_cb_noarg() marker_probe_cb_noarg() should not be seen by outer code. this patch remove it. Signed-off-by: Lai Jiangshan Acked-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/marker.h | 2 -- kernel/marker.c | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/marker.h b/include/linux/marker.h index 889196c7fbb1..4cf45472d9f5 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -136,8 +136,6 @@ extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, void *call_private, ...); -extern void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, ...); /* * Connect a probe to a marker. diff --git a/kernel/marker.c b/kernel/marker.c index 75a1a17cd78a..23192646555f 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(marker_probe_cb); * * Should be connected to markers "MARK_NOARGS". */ -void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) +static void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) { va_list args; /* not initialized */ char ptype; @@ -198,7 +198,6 @@ void marker_probe_cb_noarg(const struct marker *mdata, void *call_private, ...) } rcu_read_unlock_sched(); } -EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); static void free_old_closure(struct rcu_head *head) { -- cgit v1.2.3 From 944ac4259e39801c843a915c3da8194ac9af0440 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 23 Oct 2008 19:26:08 -0400 Subject: ftrace: ftrace dump on oops control Impact: add (default-off) dump-trace-on-oops flag Currently, ftrace is set up to dump its contents to the console if the kernel panics or oops. This can be annoying if you have trace data in the buffers and you experience an oops, but the trace data is old or static. Usually when you want ftrace to dump its contents is when you are debugging your system and you have set up ftrace to trace the events leading to an oops. This patch adds a control variable called "ftrace_dump_on_oops" that will enable the ftrace dump to console on oops. This variable is default off but a developer can enable it either through the kernel command line by adding "ftrace_dump_on_oops" or at run time by setting (or disabling) /proc/sys/kernel/ftrace_dump_on_oops. v2: Replaced /** with /* as Randy explained that kernel-doc does not yet handle variables. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ kernel/sysctl.c | 10 ++++++++++ kernel/trace/trace.c | 29 ++++++++++++++++++++++++++--- 3 files changed, 38 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a3d46151be19..9623b7b9e5a5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -165,6 +165,8 @@ static inline void __ftrace_enabled_restore(int enabled) #endif #ifdef CONFIG_TRACING +extern int ftrace_dump_on_oops; + extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index a13bd4dfaeb1..84754f5801e6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -484,6 +484,16 @@ static struct ctl_table kern_table[] = { .proc_handler = &ftrace_enable_sysctl, }, #endif +#ifdef CONFIG_TRACING + { + .ctl_name = CTL_UNNUMBERED, + .procname = "ftrace_dump_on_opps", + .data = &ftrace_dump_on_oops, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif #ifdef CONFIG_MODULES { .ctl_name = KERN_MODPROBE, diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d345d649d073..47f46cbdd860 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -63,6 +63,28 @@ static cpumask_t __read_mostly tracing_buffer_mask; static int tracing_disabled = 1; +/* + * ftrace_dump_on_oops - variable to dump ftrace buffer on oops + * + * If there is an oops (or kernel panic) and the ftrace_dump_on_oops + * is set, then ftrace_dump is called. This will output the contents + * of the ftrace buffers to the console. This is very useful for + * capturing traces that lead to crashes and outputing it to a + * serial console. + * + * It is default off, but you can enable it with either specifying + * "ftrace_dump_on_oops" in the kernel command line, or setting + * /proc/sys/kernel/ftrace_dump_on_oops to true. + */ +int ftrace_dump_on_oops; + +static int __init set_ftrace_dump_on_oops(char *str) +{ + ftrace_dump_on_oops = 1; + return 1; +} +__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); + long ns2usecs(cycle_t nsec) { @@ -3021,7 +3043,8 @@ EXPORT_SYMBOL_GPL(__ftrace_printk); static int trace_panic_handler(struct notifier_block *this, unsigned long event, void *unused) { - ftrace_dump(); + if (ftrace_dump_on_oops) + ftrace_dump(); return NOTIFY_OK; } @@ -3037,7 +3060,8 @@ static int trace_die_handler(struct notifier_block *self, { switch (val) { case DIE_OOPS: - ftrace_dump(); + if (ftrace_dump_on_oops) + ftrace_dump(); break; default: break; @@ -3078,7 +3102,6 @@ trace_printk_seq(struct trace_seq *s) trace_seq_reset(s); } - void ftrace_dump(void) { static DEFINE_SPINLOCK(ftrace_dump_lock); -- cgit v1.2.3 From 1080d709fb9d8cd4392f93476ee46a9d6ea05a5b Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 27 Oct 2008 12:28:25 -0700 Subject: net: implement emergency route cache rebulds when gc_elasticity is exceeded This is a patch to provide on demand route cache rebuilding. Currently, our route cache is rebulid periodically regardless of need. This introduced unneeded periodic latency. This patch offers a better approach. Using code provided by Eric Dumazet, we compute the standard deviation of the average hash bucket chain length while running rt_check_expire. Should any given chain length grow to larger that average plus 4 standard deviations, we trigger an emergency hash table rebuild for that net namespace. This allows for the common case in which chains are well behaved and do not grow unevenly to not incur any latency at all, while those systems (which may be being maliciously attacked), only rebuild when the attack is detected. This patch take 2 other factors into account: 1) chains with multiple entries that differ by attributes that do not affect the hash value are only counted once, so as not to unduly bias system to rebuilding if features like QOS are heavily used 2) if rebuilding crosses a certain threshold (which is adjustable via the added sysctl in this patch), route caching is disabled entirely for that net namespace, since constant rebuilding is less efficient that no caching at all Tested successfully by me. Signed-off-by: Neil Horman Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 6 ++ include/net/netns/ipv4.h | 2 + net/ipv4/route.c | 132 ++++++++++++++++++++++++++++++++- net/ipv4/sysctl_net_ipv4.c | 12 +++ 4 files changed, 150 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index d84932650fd3..c7712787933c 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -27,6 +27,12 @@ min_adv_mss - INTEGER The advertised MSS depends on the first hop route MTU, but will never be lower than this setting. +rt_cache_rebuild_count - INTEGER + The per net-namespace route cache emergency rebuild threshold. + Any net-namespace having its route cache rebuilt due to + a hash bucket chain being too long more than this many times + will have its route caching disabled + IP Fragmentation: ipfrag_high_thresh - INTEGER diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index ece1c926b5d1..977f482d97a9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -49,6 +49,8 @@ struct netns_ipv4 { int sysctl_icmp_ratelimit; int sysctl_icmp_ratemask; int sysctl_icmp_errors_use_inbound_ifaddr; + int sysctl_rt_cache_rebuild_count; + int current_rt_cache_rebuild_count; struct timer_list rt_secret_timer; atomic_t rt_genid; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2ea6dcc3e2cc..21ce7e1b2284 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -129,6 +129,7 @@ static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ; static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20; static int ip_rt_min_advmss __read_mostly = 256; static int ip_rt_secret_interval __read_mostly = 10 * 60 * HZ; +static int rt_chain_length_max __read_mostly = 20; static void rt_worker_func(struct work_struct *work); static DECLARE_DELAYED_WORK(expires_work, rt_worker_func); @@ -145,6 +146,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu); static int rt_garbage_collect(struct dst_ops *ops); +static void rt_emergency_hash_rebuild(struct net *net); static struct dst_ops ipv4_dst_ops = { @@ -201,6 +203,7 @@ const __u8 ip_tos2prio[16] = { struct rt_hash_bucket { struct rtable *chain; }; + #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ defined(CONFIG_PROVE_LOCKING) /* @@ -674,6 +677,20 @@ static inline u32 rt_score(struct rtable *rt) return score; } +static inline bool rt_caching(const struct net *net) +{ + return net->ipv4.current_rt_cache_rebuild_count <= + net->ipv4.sysctl_rt_cache_rebuild_count; +} + +static inline bool compare_hash_inputs(const struct flowi *fl1, + const struct flowi *fl2) +{ + return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | + (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) | + (fl1->iif ^ fl2->iif)) == 0); +} + static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | @@ -753,11 +770,24 @@ static void rt_do_flush(int process_context) } } +/* + * While freeing expired entries, we compute average chain length + * and standard deviation, using fixed-point arithmetic. + * This to have an estimation of rt_chain_length_max + * rt_chain_length_max = max(elasticity, AVG + 4*SD) + * We use 3 bits for frational part, and 29 (or 61) for magnitude. + */ + +#define FRACT_BITS 3 +#define ONE (1UL << FRACT_BITS) + static void rt_check_expire(void) { static unsigned int rover; unsigned int i = rover, goal; struct rtable *rth, **rthp; + unsigned long length = 0, samples = 0; + unsigned long sum = 0, sum2 = 0; u64 mult; mult = ((u64)ip_rt_gc_interval) << rt_hash_log; @@ -766,6 +796,7 @@ static void rt_check_expire(void) goal = (unsigned int)mult; if (goal > rt_hash_mask) goal = rt_hash_mask + 1; + length = 0; for (; goal > 0; goal--) { unsigned long tmo = ip_rt_gc_timeout; @@ -775,6 +806,8 @@ static void rt_check_expire(void) if (need_resched()) cond_resched(); + samples++; + if (*rthp == NULL) continue; spin_lock_bh(rt_hash_lock_addr(i)); @@ -789,11 +822,29 @@ static void rt_check_expire(void) if (time_before_eq(jiffies, rth->u.dst.expires)) { tmo >>= 1; rthp = &rth->u.dst.rt_next; + /* + * Only bump our length if the hash + * inputs on entries n and n+1 are not + * the same, we only count entries on + * a chain with equal hash inputs once + * so that entries for different QOS + * levels, and other non-hash input + * attributes don't unfairly skew + * the length computation + */ + if ((*rthp == NULL) || + !compare_hash_inputs(&(*rthp)->fl, + &rth->fl)) + length += ONE; continue; } } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) { tmo >>= 1; rthp = &rth->u.dst.rt_next; + if ((*rthp == NULL) || + !compare_hash_inputs(&(*rthp)->fl, + &rth->fl)) + length += ONE; continue; } @@ -802,6 +853,15 @@ static void rt_check_expire(void) rt_free(rth); } spin_unlock_bh(rt_hash_lock_addr(i)); + sum += length; + sum2 += length*length; + } + if (samples) { + unsigned long avg = sum / samples; + unsigned long sd = int_sqrt(sum2 / samples - avg*avg); + rt_chain_length_max = max_t(unsigned long, + ip_rt_gc_elasticity, + (avg + 4*sd) >> FRACT_BITS); } rover = i; } @@ -851,6 +911,26 @@ static void rt_secret_rebuild(unsigned long __net) mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval); } +static void rt_secret_rebuild_oneshot(struct net *net) +{ + del_timer_sync(&net->ipv4.rt_secret_timer); + rt_cache_invalidate(net); + if (ip_rt_secret_interval) { + net->ipv4.rt_secret_timer.expires += ip_rt_secret_interval; + add_timer(&net->ipv4.rt_secret_timer); + } +} + +static void rt_emergency_hash_rebuild(struct net *net) +{ + if (net_ratelimit()) { + printk(KERN_WARNING "Route hash chain too long!\n"); + printk(KERN_WARNING "Adjust your secret_interval!\n"); + } + + rt_secret_rebuild_oneshot(net); +} + /* Short description of GC goals. @@ -989,6 +1069,7 @@ out: return 0; static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp) { struct rtable *rth, **rthp; + struct rtable *rthi; unsigned long now; struct rtable *cand, **candp; u32 min_score; @@ -1002,7 +1083,13 @@ restart: candp = NULL; now = jiffies; + if (!rt_caching(dev_net(rt->u.dst.dev))) { + rt_drop(rt); + return 0; + } + rthp = &rt_hash_table[hash].chain; + rthi = NULL; spin_lock_bh(rt_hash_lock_addr(hash)); while ((rth = *rthp) != NULL) { @@ -1048,6 +1135,17 @@ restart: chain_length++; rthp = &rth->u.dst.rt_next; + + /* + * check to see if the next entry in the chain + * contains the same hash input values as rt. If it does + * This is where we will insert into the list, instead of + * at the head. This groups entries that differ by aspects not + * relvant to the hash function together, which we use to adjust + * our chain length + */ + if (*rthp && compare_hash_inputs(&(*rthp)->fl, &rt->fl)) + rthi = rth; } if (cand) { @@ -1061,6 +1159,16 @@ restart: *candp = cand->u.dst.rt_next; rt_free(cand); } + } else { + if (chain_length > rt_chain_length_max) { + struct net *net = dev_net(rt->u.dst.dev); + int num = ++net->ipv4.current_rt_cache_rebuild_count; + if (!rt_caching(dev_net(rt->u.dst.dev))) { + printk(KERN_WARNING "%s: %d rebuilds is over limit, route caching disabled\n", + rt->u.dst.dev->name, num); + } + rt_emergency_hash_rebuild(dev_net(rt->u.dst.dev)); + } } /* Try to bind route to arp only if it is output @@ -1098,7 +1206,11 @@ restart: } } - rt->u.dst.rt_next = rt_hash_table[hash].chain; + if (rthi) + rt->u.dst.rt_next = rthi->u.dst.rt_next; + else + rt->u.dst.rt_next = rt_hash_table[hash].chain; + #if RT_CACHE_DEBUG >= 2 if (rt->u.dst.rt_next) { struct rtable *trt; @@ -1114,7 +1226,11 @@ restart: * previous writes to rt are comitted to memory * before making rt visible to other CPUS. */ - rcu_assign_pointer(rt_hash_table[hash].chain, rt); + if (rthi) + rcu_assign_pointer(rthi->u.dst.rt_next, rt); + else + rcu_assign_pointer(rt_hash_table[hash].chain, rt); + spin_unlock_bh(rt_hash_lock_addr(hash)); *rp = rt; return 0; @@ -1217,6 +1333,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, || ipv4_is_zeronet(new_gw)) goto reject_redirect; + if (!rt_caching(net)) + goto reject_redirect; + if (!IN_DEV_SHARED_MEDIA(in_dev)) { if (!inet_addr_onlink(in_dev, new_gw, old_gw)) goto reject_redirect; @@ -2130,6 +2249,10 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct net *net; net = dev_net(dev); + + if (!rt_caching(net)) + goto skip_cache; + tos &= IPTOS_RT_MASK; hash = rt_hash(daddr, saddr, iif, rt_genid(net)); @@ -2154,6 +2277,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, } rcu_read_unlock(); +skip_cache: /* Multicast recognition logic is moved from route cache to here. The problem was that too many Ethernet cards have broken/missing hardware multicast filters :-( As result the host on multicasting @@ -2539,6 +2663,9 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, unsigned hash; struct rtable *rth; + if (!rt_caching(net)) + goto slow_output; + hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net)); rcu_read_lock_bh(); @@ -2563,6 +2690,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp, } rcu_read_unlock_bh(); +slow_output: return ip_route_output_slow(net, rp, flp); } diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1bb10df8ce7d..0cc8d31f9ac0 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -795,6 +795,14 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rt_cache_rebuild_count", + .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, { } }; @@ -827,8 +835,12 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) &net->ipv4.sysctl_icmp_ratelimit; table[5].data = &net->ipv4.sysctl_icmp_ratemask; + table[6].data = + &net->ipv4.sysctl_rt_cache_rebuild_count; } + net->ipv4.sysctl_rt_cache_rebuild_count = 4; + net->ipv4.ipv4_hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); if (net->ipv4.ipv4_hdr == NULL) -- cgit v1.2.3 From b057efd4d226fcc3a92b0dc6d8ea8e8185ecb260 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 28 Oct 2008 11:59:11 -0700 Subject: netlink: constify struct nlattr * arg to parsing functions Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netlink.h | 22 +++++++++++----------- net/netlink/attr.c | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/netlink.h b/include/net/netlink.h index 3643bbb8e585..46b7764f1774 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -233,7 +233,7 @@ extern int nla_parse(struct nlattr *tb[], int maxtype, extern struct nlattr * nla_find(struct nlattr *head, int len, int attrtype); extern size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); -extern int nla_memcpy(void *dest, struct nlattr *src, int count); +extern int nla_memcpy(void *dest, const struct nlattr *src, int count); extern int nla_memcmp(const struct nlattr *nla, const void *data, size_t size); extern int nla_strcmp(const struct nlattr *nla, const char *str); @@ -741,7 +741,7 @@ static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype) * See nla_parse() */ static inline int nla_parse_nested(struct nlattr *tb[], int maxtype, - struct nlattr *nla, + const struct nlattr *nla, const struct nla_policy *policy) { return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); @@ -875,7 +875,7 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, * nla_get_u32 - return payload of u32 attribute * @nla: u32 netlink attribute */ -static inline u32 nla_get_u32(struct nlattr *nla) +static inline u32 nla_get_u32(const struct nlattr *nla) { return *(u32 *) nla_data(nla); } @@ -884,7 +884,7 @@ static inline u32 nla_get_u32(struct nlattr *nla) * nla_get_be32 - return payload of __be32 attribute * @nla: __be32 netlink attribute */ -static inline __be32 nla_get_be32(struct nlattr *nla) +static inline __be32 nla_get_be32(const struct nlattr *nla) { return *(__be32 *) nla_data(nla); } @@ -893,7 +893,7 @@ static inline __be32 nla_get_be32(struct nlattr *nla) * nla_get_u16 - return payload of u16 attribute * @nla: u16 netlink attribute */ -static inline u16 nla_get_u16(struct nlattr *nla) +static inline u16 nla_get_u16(const struct nlattr *nla) { return *(u16 *) nla_data(nla); } @@ -902,7 +902,7 @@ static inline u16 nla_get_u16(struct nlattr *nla) * nla_get_be16 - return payload of __be16 attribute * @nla: __be16 netlink attribute */ -static inline __be16 nla_get_be16(struct nlattr *nla) +static inline __be16 nla_get_be16(const struct nlattr *nla) { return *(__be16 *) nla_data(nla); } @@ -911,7 +911,7 @@ static inline __be16 nla_get_be16(struct nlattr *nla) * nla_get_le16 - return payload of __le16 attribute * @nla: __le16 netlink attribute */ -static inline __le16 nla_get_le16(struct nlattr *nla) +static inline __le16 nla_get_le16(const struct nlattr *nla) { return *(__le16 *) nla_data(nla); } @@ -920,7 +920,7 @@ static inline __le16 nla_get_le16(struct nlattr *nla) * nla_get_u8 - return payload of u8 attribute * @nla: u8 netlink attribute */ -static inline u8 nla_get_u8(struct nlattr *nla) +static inline u8 nla_get_u8(const struct nlattr *nla) { return *(u8 *) nla_data(nla); } @@ -929,7 +929,7 @@ static inline u8 nla_get_u8(struct nlattr *nla) * nla_get_u64 - return payload of u64 attribute * @nla: u64 netlink attribute */ -static inline u64 nla_get_u64(struct nlattr *nla) +static inline u64 nla_get_u64(const struct nlattr *nla) { u64 tmp; @@ -942,7 +942,7 @@ static inline u64 nla_get_u64(struct nlattr *nla) * nla_get_flag - return payload of flag attribute * @nla: flag netlink attribute */ -static inline int nla_get_flag(struct nlattr *nla) +static inline int nla_get_flag(const struct nlattr *nla) { return !!nla; } @@ -953,7 +953,7 @@ static inline int nla_get_flag(struct nlattr *nla) * * Returns the number of milliseconds in jiffies. */ -static inline unsigned long nla_get_msecs(struct nlattr *nla) +static inline unsigned long nla_get_msecs(const struct nlattr *nla) { u64 msecs = nla_get_u64(nla); diff --git a/net/netlink/attr.c b/net/netlink/attr.c index 2d106cfe1d27..c83fea7da9a8 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -233,7 +233,7 @@ size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) * * Returns the number of bytes copied. */ -int nla_memcpy(void *dest, struct nlattr *src, int count) +int nla_memcpy(void *dest, const struct nlattr *src, int count) { int minlen = min_t(int, count, nla_len(src)); -- cgit v1.2.3 From def8b4faff5ca349beafbbfeb2c51f3602a6ef3a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 28 Oct 2008 13:24:06 -0700 Subject: net: reduce structures when XFRM=n ifdef out * struct sk_buff::sp (pointer) * struct dst_entry::xfrm (pointer) * struct sock::sk_policy (2 pointers) Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/skbuff.h | 15 ++++++++++++++- include/net/dst.h | 3 ++- include/net/sock.h | 2 ++ include/net/xfrm.h | 4 ++++ net/core/skbuff.c | 2 +- net/ipv4/icmp.c | 3 ++- net/ipv4/ip_forward.c | 2 +- net/ipv4/route.c | 2 ++ net/ipv6/icmp.c | 3 ++- net/ipv6/ip6_output.c | 2 +- security/selinux/hooks.c | 4 ++-- 11 files changed, 33 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2725f4e5a9bf..487e34507b41 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -269,8 +269,9 @@ struct sk_buff { struct dst_entry *dst; struct rtable *rtable; }; +#ifdef CONFIG_XFRM struct sec_path *sp; - +#endif /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you @@ -1864,6 +1865,18 @@ static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_bu to->queue_mapping = from->queue_mapping; } +#ifdef CONFIG_XFRM +static inline struct sec_path *skb_sec_path(struct sk_buff *skb) +{ + return skb->sp; +} +#else +static inline struct sec_path *skb_sec_path(struct sk_buff *skb) +{ + return NULL; +} +#endif + static inline int skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; diff --git a/include/net/dst.h b/include/net/dst.h index 8a8b71e5f3f1..f96c4ba4dd32 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -59,8 +59,9 @@ struct dst_entry struct neighbour *neighbour; struct hh_cache *hh; +#ifdef CONFIG_XFRM struct xfrm_state *xfrm; - +#endif int (*input)(struct sk_buff*); int (*output)(struct sk_buff*); diff --git a/include/net/sock.h b/include/net/sock.h index ada50c04d09f..d6b750a25078 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -229,7 +229,9 @@ struct sock { } sk_backlog; wait_queue_head_t *sk_sleep; struct dst_entry *sk_dst_cache; +#ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2]; +#endif rwlock_t sk_dst_lock; atomic_t sk_rmem_alloc; atomic_t sk_wmem_alloc; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 11c890ad8ebb..f2c5ba28a428 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -882,6 +882,7 @@ struct xfrm_dst u32 path_cookie; }; +#ifdef CONFIG_XFRM static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) { dst_release(xdst->route); @@ -894,6 +895,7 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) xdst->partner = NULL; #endif } +#endif extern void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev); @@ -1536,9 +1538,11 @@ static inline void xfrm_states_delete(struct xfrm_state **states, int n) } #endif +#ifdef CONFIG_XFRM static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) { return skb->sp->xvec[skb->sp->len - 1]; } +#endif #endif /* _NET_XFRM_H */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e22e3a35359..cdfe473181af 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -489,7 +489,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->network_header = old->network_header; new->mac_header = old->mac_header; new->dst = dst_clone(old->dst); -#ifdef CONFIG_INET +#ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif memcpy(new->cb, old->cb, sizeof(old->cb)); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 72b2de76f1cd..e9d6ea0b49ca 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -976,9 +976,10 @@ int icmp_rcv(struct sk_buff *skb) struct net *net = dev_net(rt->u.dst.dev); if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { + struct sec_path *sp = skb_sec_path(skb); int nh; - if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & + if (!(sp && sp->xvec[sp->len - 1]->props.flags & XFRM_STATE_ICMP)) goto drop; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 450016b89a18..df3fe50bbf0d 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -106,7 +106,7 @@ int ip_forward(struct sk_buff *skb) * We now generate an ICMP HOST REDIRECT giving the route * we calculated. */ - if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb->sp) + if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr && !skb_sec_path(skb)) ip_rt_send_redirect(skb); skb->priority = rt_tos2priority(iph->tos); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 21ce7e1b2284..ffb2c5705432 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1399,7 +1399,9 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, rt->u.dst.path = &rt->u.dst; rt->u.dst.neighbour = NULL; rt->u.dst.hh = NULL; +#ifdef CONFIG_XFRM rt->u.dst.xfrm = NULL; +#endif rt->rt_genid = rt_genid(net); rt->rt_flags |= RTCF_REDIRECTED; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 9b7d19ae5ced..508a713ac045 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -646,9 +646,10 @@ static int icmpv6_rcv(struct sk_buff *skb) int type; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { + struct sec_path *sp = skb_sec_path(skb); int nh; - if (!(skb->sp && skb->sp->xvec[skb->sp->len - 1]->props.flags & + if (!(sp && sp->xvec[sp->len - 1]->props.flags & XFRM_STATE_ICMP)) goto drop_no_count; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index c77db0b95e26..7d92fd97cfb9 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -490,7 +490,7 @@ int ip6_forward(struct sk_buff *skb) We don't send redirects to frames decapsulated from IPsec. */ if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && - !skb->sp) { + !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct rt6_info *rt; struct neighbour *n = dst->neighbour; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 3e3fde7c1d2b..aedf02b1345a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4626,7 +4626,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, * as fast and as clean as possible. */ if (selinux_compat_net || !selinux_policycap_netpeer) return selinux_ip_postroute_compat(skb, ifindex, family); - +#ifdef CONFIG_XFRM /* If skb->dst->xfrm is non-NULL then the packet is undergoing an IPsec * packet transformation so allow the packet to pass without any checks * since we'll have another chance to perform access control checks @@ -4635,7 +4635,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex, * is NULL, in this case go ahead and apply access control. */ if (skb->dst != NULL && skb->dst->xfrm != NULL) return NF_ACCEPT; - +#endif secmark_active = selinux_secmark_enabled(); peerlbl_active = netlbl_enabled() || selinux_xfrm_enabled(); if (!secmark_active && !peerlbl_active) -- cgit v1.2.3 From 3a2dfbe8acb154905fdc2fd03ec56df42e6c4cc4 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Tue, 28 Oct 2008 16:01:07 -0700 Subject: xfrm: Notify changes in UDP encapsulation via netlink Add new_mapping() implementation to the netlink xfrm_mgr to notify address/port changes detected in UDP encapsulated ESP packets. Signed-off-by: Martin Willi Signed-off-by: David S. Miller --- include/linux/xfrm.h | 14 ++++++++++++++ net/xfrm/xfrm_user.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) (limited to 'include') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 4bc1e6b86cb2..52f3abd453a1 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -199,6 +199,9 @@ enum { #define XFRM_MSG_NEWSPDINFO XFRM_MSG_NEWSPDINFO XFRM_MSG_GETSPDINFO, #define XFRM_MSG_GETSPDINFO XFRM_MSG_GETSPDINFO + + XFRM_MSG_MAPPING, +#define XFRM_MSG_MAPPING XFRM_MSG_MAPPING __XFRM_MSG_MAX }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) @@ -438,6 +441,15 @@ struct xfrm_user_migrate { __u16 new_family; }; +struct xfrm_user_mapping { + struct xfrm_usersa_id id; + __u32 reqid; + xfrm_address_t old_saddr; + xfrm_address_t new_saddr; + __be16 old_sport; + __be16 new_sport; +}; + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 @@ -464,6 +476,8 @@ enum xfrm_nlgroups { #define XFRMNLGRP_REPORT XFRMNLGRP_REPORT XFRMNLGRP_MIGRATE, #define XFRMNLGRP_MIGRATE XFRMNLGRP_MIGRATE + XFRMNLGRP_MAPPING, +#define XFRMNLGRP_MAPPING XFRMNLGRP_MAPPING __XFRMNLGRP_MAX }; #define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4a8a1abb59ee..76cf56d5d834 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2503,6 +2503,57 @@ static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); } +static inline size_t xfrm_mapping_msgsize(void) +{ + return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping)); +} + +static int build_mapping(struct sk_buff *skb, struct xfrm_state *x, + xfrm_address_t *new_saddr, __be16 new_sport) +{ + struct xfrm_user_mapping *um; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MAPPING, sizeof(*um), 0); + if (nlh == NULL) + return -EMSGSIZE; + + um = nlmsg_data(nlh); + + memcpy(&um->id.daddr, &x->id.daddr, sizeof(um->id.daddr)); + um->id.spi = x->id.spi; + um->id.family = x->props.family; + um->id.proto = x->id.proto; + memcpy(&um->new_saddr, new_saddr, sizeof(um->new_saddr)); + memcpy(&um->old_saddr, &x->props.saddr, sizeof(um->old_saddr)); + um->new_sport = new_sport; + um->old_sport = x->encap->encap_sport; + um->reqid = x->props.reqid; + + return nlmsg_end(skb, nlh); +} + +static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, + __be16 sport) +{ + struct sk_buff *skb; + + if (x->id.proto != IPPROTO_ESP) + return -EINVAL; + + if (!x->encap) + return -EINVAL; + + skb = nlmsg_new(xfrm_mapping_msgsize(), GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_mapping(skb, x, ipaddr, sport) < 0) + BUG(); + + return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC); +} + static struct xfrm_mgr netlink_mgr = { .id = "netlink", .notify = xfrm_send_state_notify, @@ -2511,6 +2562,7 @@ static struct xfrm_mgr netlink_mgr = { .notify_policy = xfrm_send_policy_notify, .report = xfrm_send_report, .migrate = xfrm_send_migrate, + .new_mapping = xfrm_send_mapping, }; static int __init xfrm_user_init(void) -- cgit v1.2.3 From 0c6ce78abf6e228d44c3840edb8a4ae0c1299825 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 28 Oct 2008 16:09:23 -0700 Subject: net: replace uses of NIP6_FMT with %p6 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- include/linux/sunrpc/svc_xprt.h | 4 +-- include/net/ip_vs.h | 4 +-- include/net/netfilter/nf_conntrack_tuple.h | 6 ++--- include/net/sctp/sctp.h | 4 +-- net/ipv4/tcp_input.c | 4 +-- net/ipv4/tcp_timer.c | 4 +-- net/ipv6/addrlabel.c | 34 +++++++++----------------- net/ipv6/ah6.c | 4 +-- net/ipv6/esp6.c | 4 +-- net/ipv6/exthdrs.c | 2 +- net/ipv6/icmp.c | 4 +-- net/ipv6/ip6mr.c | 5 ++-- net/ipv6/ipcomp6.c | 4 +-- net/ipv6/ndisc.c | 7 ++---- net/ipv6/netfilter/ip6t_LOG.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 5 ++-- net/ipv6/tcp_ipv6.c | 8 +++--- 17 files changed, 43 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 6fd7b016517f..42e01c93c7ea 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -145,8 +145,8 @@ static inline char *__svc_print_addr(struct sockaddr *addr, break; case AF_INET6: - snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", - NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), + snprintf(buf, len, "%p6, port=%u", + &((struct sockaddr_in6 *)addr)->sin6_addr, ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); break; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index fe9fcf73c85e..6a6692067092 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -87,8 +87,8 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, int len; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) - len = snprintf(&buf[*idx], buf_len - *idx, "[" NIP6_FMT "]", - NIP6(addr->in6)) + 1; + len = snprintf(&buf[*idx], buf_len - *idx, "[%p6]", + &addr->in6) + 1; else #endif len = snprintf(&buf[*idx], buf_len - *idx, NIPQUAD_FMT, diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index a6874ba22d54..303efaf68d08 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -122,10 +122,10 @@ static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t) static inline void nf_ct_dump_tuple_ipv6(const struct nf_conntrack_tuple *t) { #ifdef DEBUG - printk("tuple %p: %u " NIP6_FMT " %hu -> " NIP6_FMT " %hu\n", + printk("tuple %p: %u %p6 %hu -> %p6 %hu\n", t, t->dst.protonum, - NIP6(*(struct in6_addr *)t->src.u3.all), ntohs(t->src.u.all), - NIP6(*(struct in6_addr *)t->dst.u3.all), ntohs(t->dst.u.all)); + t->src.u3.all, ntohs(t->src.u.all), + t->dst.u3.all, ntohs(t->dst.u.all)); #endif } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index ed71b110edf7..a84c3976e1b0 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -285,9 +285,9 @@ extern int sctp_debug_flag; if (sctp_debug_flag) { \ if (saddr->sa.sa_family == AF_INET6) { \ printk(KERN_DEBUG \ - lead NIP6_FMT trail, \ + lead "%p6" trail, \ leadparm, \ - NIP6(saddr->v6.sin6_addr), \ + &saddr->v6.sin6_addr, \ otherparms); \ } else { \ printk(KERN_DEBUG \ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d77c0d29e239..191c06bb0f67 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2346,9 +2346,9 @@ static void DBGUNDO(struct sock *sk, const char *msg) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - printk(KERN_DEBUG "Undo %s " NIP6_FMT "/%u c%u l%u ss%u/%u p%u\n", + printk(KERN_DEBUG "Undo %s %p6/%u c%u l%u ss%u/%u p%u\n", msg, - NIP6(np->daddr), ntohs(inet->dport), + &np->daddr, ntohs(inet->dport), tp->snd_cwnd, tcp_left_out(tp), tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 6b6dff1164b9..4e6ee5205237 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -306,8 +306,8 @@ static void tcp_retransmit_timer(struct sock *sk) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer " NIP6_FMT ":%u/%u shrinks window %u:%u. Repaired.\n", - NIP6(np->daddr), ntohs(inet->dport), + LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %p6:%u/%u shrinks window %u:%u. Repaired.\n", + &np->daddr, ntohs(inet->dport), inet->num, tp->snd_una, tp->snd_nxt); } #endif diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 08909039d87b..d28036659d27 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -186,10 +186,8 @@ u32 ipv6_addr_label(struct net *net, label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; rcu_read_unlock(); - ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n", - __func__, - NIP6(*addr), type, ifindex, - label); + ADDRLABEL(KERN_DEBUG "%s(addr=%p6, type=%d, ifindex=%d) => %08x\n", + __func__, addr, type, ifindex, label); return label; } @@ -203,11 +201,8 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, struct ip6addrlbl_entry *newp; int addrtype; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex, - (unsigned int)label); + ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u)\n", + __func__, prefix, prefixlen, ifindex, (unsigned int)label); addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); @@ -294,12 +289,9 @@ static int ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex, - (unsigned int)label, - replace); + ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", + __func__, prefix, prefixlen, ifindex, (unsigned int)label, + replace); newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); if (IS_ERR(newp)) @@ -321,10 +313,8 @@ static int __ip6addrlbl_del(struct net *net, struct hlist_node *pos, *n; int ret = -ESRCH; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex); + ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n", + __func__, prefix, prefixlen, ifindex); hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && @@ -347,10 +337,8 @@ static int ip6addrlbl_del(struct net *net, struct in6_addr prefix_buf; int ret; - ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n", - __func__, - NIP6(*prefix), prefixlen, - ifindex); + ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n", + __func__, prefix, prefixlen, ifindex); ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); spin_lock(&ip6addrlbl_table.lock); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 2ff0c8233e47..9bc43f2527ce 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -419,8 +419,8 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/" NIP6_FMT "\n", - ntohl(ah->spi), NIP6(iph->daddr)); + NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%p6\n", + ntohl(ah->spi), &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index b181b08fb761..ec4be188c341 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -367,8 +367,8 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/" NIP6_FMT "\n", - ntohl(esph->spi), NIP6(iph->daddr)); + printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%p6\n", + ntohl(esph->spi), &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 6bfffec2371c..a89051468359 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -219,7 +219,7 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff) if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { LIMIT_NETDEBUG( - KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr)); + KERN_DEBUG "hao is not an unicast addr: %p6\n", &hao->addr); goto discard; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 508a713ac045..b3fa38e40dc4 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -681,8 +681,8 @@ static int icmpv6_rcv(struct sk_buff *skb) skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [" NIP6_FMT " > " NIP6_FMT "]\n", - NIP6(*saddr), NIP6(*daddr)); + LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%p6 > %p6]\n", + saddr, daddr); goto discard_it; } } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 182f8a177e7f..798e6a29dd83 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -297,9 +297,8 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct mfc6_cache *mfc = v; const struct ipmr_mfc_iter *it = seq->private; - seq_printf(seq, - NIP6_FMT " " NIP6_FMT " %-3d %8ld %8ld %8ld", - NIP6(mfc->mf6c_mcastgrp), NIP6(mfc->mf6c_origin), + seq_printf(seq, "%p6 %p6 %-3d %8ld %8ld %8ld", + &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, mfc->mf6c_parent, mfc->mfc_un.res.pkt, mfc->mfc_un.res.bytes, diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 4545e4306862..9566d8f73140 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -67,8 +67,8 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIP6_FMT "\n", - spi, NIP6(iph->daddr)); + printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%p6\n", + spi, &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 172438320eec..191bb0722a7c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -647,11 +647,8 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) if ((probes -= neigh->parms->ucast_probes) < 0) { if (!(neigh->nud_state & NUD_VALID)) { - ND_PRINTK1(KERN_DEBUG - "%s(): trying to ucast probe in NUD_INVALID: " - NIP6_FMT "\n", - __func__, - NIP6(*target)); + ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %p6\n", + __func__, target); } ndisc_send_ns(dev, neigh, target, target, saddr); } else if ((probes -= neigh->parms->app_probes) < 0) { diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index caa441d09567..a61ce3010007 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -61,7 +61,7 @@ static void dump_packet(const struct nf_loginfo *info, } /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ - printk("SRC=" NIP6_FMT " DST=" NIP6_FMT " ", NIP6(ih->saddr), NIP6(ih->daddr)); + printk("SRC=%p6 DST=%p6 ", &ih->saddr, &ih->daddr); /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index e91db16611d9..b165a273c6c0 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -56,9 +56,8 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, static int ipv6_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "src=" NIP6_FMT " dst=" NIP6_FMT " ", - NIP6(*((struct in6_addr *)tuple->src.u3.ip6)), - NIP6(*((struct in6_addr *)tuple->dst.u3.ip6))); + return seq_printf(s, "src=%p6 dst=%p6 ", + tuple->src.u3.ip6, tuple->dst.u3.ip6); } /* diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b6b356b7912a..483550cfdf33 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -872,12 +872,10 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash %s for " - "(" NIP6_FMT ", %u)->" - "(" NIP6_FMT ", %u)\n", + printk(KERN_INFO "MD5 Hash %s for (%p6, %u)->(%p6, %u)\n", genhash ? "failed" : "mismatch", - NIP6(ip6h->saddr), ntohs(th->source), - NIP6(ip6h->daddr), ntohs(th->dest)); + &ip6h->saddr, ntohs(th->source), + &ip6h->daddr, ntohs(th->dest)); } return 1; } -- cgit v1.2.3 From b189db5d299c6824780af5590564ff608adb3dea Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 28 Oct 2008 22:38:52 -0700 Subject: net: remove NIP6(), NIP6_FMT, NIP6_SEQFMT and final users Open code NIP6_FMT in the one call inside sscanf and one user of NIP6() that could use %p6 in the netfilter code. Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- include/linux/kernel.h | 12 ------------ net/bridge/netfilter/ebt_log.c | 6 ++---- net/sunrpc/svcauth_unix.c | 2 +- 3 files changed, 3 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 396a350b87a6..77777c460099 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -357,18 +357,6 @@ static inline char *pack_hex_byte(char *buf, u8 byte) ((unsigned char *)&addr)[3] #define NIPQUAD_FMT "%u.%u.%u.%u" -#define NIP6(addr) \ - ntohs((addr).s6_addr16[0]), \ - ntohs((addr).s6_addr16[1]), \ - ntohs((addr).s6_addr16[2]), \ - ntohs((addr).s6_addr16[3]), \ - ntohs((addr).s6_addr16[4]), \ - ntohs((addr).s6_addr16[5]), \ - ntohs((addr).s6_addr16[6]), \ - ntohs((addr).s6_addr16[7]) -#define NIP6_FMT "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x" -#define NIP6_SEQFMT "%04x%04x%04x%04x%04x%04x%04x%04x" - #if defined(__LITTLE_ENDIAN) #define HIPQUAD(addr) \ ((unsigned char *)&addr)[3], \ diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 3d33c608906a..3654f3ebdb8f 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -133,10 +133,8 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum, printk(" INCOMPLETE IPv6 header"); goto out; } - printk(" IPv6 SRC=%x:%x:%x:%x:%x:%x:%x:%x " - "IPv6 DST=%x:%x:%x:%x:%x:%x:%x:%x, IPv6 " - "priority=0x%01X, Next Header=%d", NIP6(ih->saddr), - NIP6(ih->daddr), ih->priority, ih->nexthdr); + printk(" IPv6 SRC=%p6 IPv6 DST=%p6, IPv6 priority=0x%01X, Next Header=%d", + &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr); nexthdr = ih->nexthdr; offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr); if (offset_ph == -1) diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 9a6d0cfd4ce3..eb640c1a1bc9 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -214,7 +214,7 @@ static int ip_map_parse(struct cache_detail *cd, addr.s6_addr32[2] = htonl(0xffff); addr.s6_addr32[3] = htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); - } else if (sscanf(buf, NIP6_FMT "%c", + } else if (sscanf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x%c", &b1, &b2, &b3, &b4, &b5, &b6, &b7, &b8, &c) == 8) { addr.s6_addr16[0] = htons(b1); addr.s6_addr16[1] = htons(b2); -- cgit v1.2.3 From 645ca708f936b2fbeb79e52d7823e3eb2c0905f8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 01:41:45 -0700 Subject: udp: introduce struct udp_table and multiple spinlocks UDP sockets are hashed in a 128 slots hash table. This hash table is protected by *one* rwlock. This rwlock is readlocked each time an incoming UDP message is handled. This rwlock is writelocked each time a socket must be inserted in hash table (bind time), or deleted from this table (close time) This is not scalable on SMP machines : 1) Even in read mode, lock() and unlock() are atomic operations and must dirty a contended cache line, shared by all cpus. 2) A writer might be starved if many readers are 'in flight'. This can happen on a machine with some NIC receiving many UDP messages. User process can be delayed a long time at socket creation/dismantle time. This patch prepares RCU migration, by introducing 'struct udp_table and struct udp_hslot', and using one spinlock per chain, to reduce contention on central rwlock. Introducing one spinlock per chain reduces latencies, for port randomization on heavily loaded UDP servers. This also speedup bindings to specific ports. udp_lib_unhash() was uninlined, becoming to big. Some cleanups were done to ease review of following patch (RCUification of UDP Unicast lookups) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- include/net/udp.h | 25 +++--- include/net/udplite.h | 2 +- net/ipv4/udp.c | 207 ++++++++++++++++++++++++++++++-------------------- net/ipv4/udp_impl.h | 4 +- net/ipv4/udplite.c | 13 ++-- net/ipv6/udp.c | 112 +++++++++++++++------------ net/ipv6/udp_impl.h | 4 +- net/ipv6/udplite.c | 8 +- 9 files changed, 214 insertions(+), 163 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index d6b750a25078..d200dfbe1ef6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -599,7 +599,7 @@ struct proto { union { struct inet_hashinfo *hashinfo; - struct hlist_head *udp_hash; + struct udp_table *udp_table; struct raw_hashinfo *raw_hash; } h; diff --git a/include/net/udp.h b/include/net/udp.h index 1e205095ea68..df2bfe545374 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -50,8 +50,15 @@ struct udp_skb_cb { }; #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) -extern struct hlist_head udp_hash[UDP_HTABLE_SIZE]; -extern rwlock_t udp_hash_lock; +struct udp_hslot { + struct hlist_head head; + spinlock_t lock; +} __attribute__((aligned(2 * sizeof(long)))); +struct udp_table { + struct udp_hslot hash[UDP_HTABLE_SIZE]; +}; +extern struct udp_table udp_table; +extern void udp_table_init(struct udp_table *); /* Note: this must match 'valbool' in sock_setsockopt */ @@ -110,15 +117,7 @@ static inline void udp_lib_hash(struct sock *sk) BUG(); } -static inline void udp_lib_unhash(struct sock *sk) -{ - write_lock_bh(&udp_hash_lock); - if (sk_del_node_init(sk)) { - inet_sk(sk)->num = 0; - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - } - write_unlock_bh(&udp_hash_lock); -} +extern void udp_lib_unhash(struct sock *sk); static inline void udp_lib_close(struct sock *sk, long timeout) { @@ -187,7 +186,7 @@ extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, struct udp_seq_afinfo { char *name; sa_family_t family; - struct hlist_head *hashtable; + struct udp_table *udp_table; struct file_operations seq_fops; struct seq_operations seq_ops; }; @@ -196,7 +195,7 @@ struct udp_iter_state { struct seq_net_private p; sa_family_t family; int bucket; - struct hlist_head *hashtable; + struct udp_table *udp_table; }; #ifdef CONFIG_PROC_FS diff --git a/include/net/udplite.h b/include/net/udplite.h index b76b2e377af4..afdffe607b24 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -11,7 +11,7 @@ #define UDPLITE_RECV_CSCOV 11 /* receiver partial coverage (threshold ) */ extern struct proto udplite_prot; -extern struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; +extern struct udp_table udplite_table; /* * Checksum computation is all in software, hence simpler getfrag. diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2095abc3caba..2a6c491f97d7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -104,12 +104,8 @@ #include #include "udp_impl.h" -/* - * Snmp MIB for the UDP layer - */ - -struct hlist_head udp_hash[UDP_HTABLE_SIZE]; -DEFINE_RWLOCK(udp_hash_lock); +struct udp_table udp_table; +EXPORT_SYMBOL(udp_table); int sysctl_udp_mem[3] __read_mostly; int sysctl_udp_rmem_min __read_mostly; @@ -123,7 +119,7 @@ atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); static int udp_lib_lport_inuse(struct net *net, __u16 num, - const struct hlist_head udptable[], + const struct udp_hslot *hslot, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2)) @@ -131,7 +127,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, struct sock *sk2; struct hlist_node *node; - sk_for_each(sk2, node, &udptable[udp_hashfn(net, num)]) + sk_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && sk2 != sk && sk2->sk_hash == num && @@ -154,12 +150,11 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2 ) ) { - struct hlist_head *udptable = sk->sk_prot->h.udp_hash; + struct udp_hslot *hslot; + struct udp_table *udptable = sk->sk_prot->h.udp_table; int error = 1; struct net *net = sock_net(sk); - write_lock_bh(&udp_hash_lock); - if (!snum) { int low, high, remaining; unsigned rand; @@ -171,26 +166,34 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, rand = net_random(); snum = first = rand % remaining + low; rand |= 1; - while (udp_lib_lport_inuse(net, snum, udptable, sk, - saddr_comp)) { + for (;;) { + hslot = &udptable->hash[udp_hashfn(net, snum)]; + spin_lock_bh(&hslot->lock); + if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) + break; + spin_unlock_bh(&hslot->lock); do { snum = snum + rand; } while (snum < low || snum > high); if (snum == first) goto fail; } - } else if (udp_lib_lport_inuse(net, snum, udptable, sk, saddr_comp)) - goto fail; - + } else { + hslot = &udptable->hash[udp_hashfn(net, snum)]; + spin_lock_bh(&hslot->lock); + if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) + goto fail_unlock; + } inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { - sk_add_node(sk, &udptable[udp_hashfn(net, snum)]); + sk_add_node(sk, &hslot->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } error = 0; +fail_unlock: + spin_unlock_bh(&hslot->lock); fail: - write_unlock_bh(&udp_hash_lock); return error; } @@ -208,63 +211,73 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); } +static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, + unsigned short hnum, + __be16 sport, __be32 daddr, __be16 dport, int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && + !ipv6_only_sock(sk)) { + struct inet_sock *inet = inet_sk(sk); + + score = (sk->sk_family == PF_INET ? 1 : 0); + if (inet->rcv_saddr) { + if (inet->rcv_saddr != daddr) + return -1; + score += 2; + } + if (inet->daddr) { + if (inet->daddr != saddr) + return -1; + score += 2; + } + if (inet->dport) { + if (inet->dport != sport) + return -1; + score += 2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score += 2; + } + } + return score; +} + /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, - int dif, struct hlist_head udptable[]) + int dif, struct udp_table *udptable) { struct sock *sk, *result = NULL; struct hlist_node *node; unsigned short hnum = ntohs(dport); - int badness = -1; - - read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { - struct inet_sock *inet = inet_sk(sk); - - if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && - !ipv6_only_sock(sk)) { - int score = (sk->sk_family == PF_INET ? 1 : 0); - if (inet->rcv_saddr) { - if (inet->rcv_saddr != daddr) - continue; - score+=2; - } - if (inet->daddr) { - if (inet->daddr != saddr) - continue; - score+=2; - } - if (inet->dport) { - if (inet->dport != sport) - continue; - score+=2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score+=2; - } - if (score == 9) { - result = sk; - break; - } else if (score > badness) { - result = sk; - badness = score; - } + unsigned int hash = udp_hashfn(net, hnum); + struct udp_hslot *hslot = &udptable->hash[hash]; + int score, badness = -1; + + spin_lock(&hslot->lock); + sk_for_each(sk, node, &hslot->head) { + score = compute_score(sk, net, saddr, hnum, sport, + daddr, dport, dif); + if (score > badness) { + result = sk; + badness = score; } } if (result) sock_hold(result); - read_unlock(&udp_hash_lock); + spin_unlock(&hslot->lock); return result; } static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, - struct hlist_head udptable[]) + struct udp_table *udptable) { struct sock *sk; const struct iphdr *iph = ip_hdr(skb); @@ -280,7 +293,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif) { - return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash); + return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); } EXPORT_SYMBOL_GPL(udp4_lib_lookup); @@ -323,7 +336,7 @@ found: * to find the appropriate port. */ -void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) +void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) { struct inet_sock *inet; struct iphdr *iph = (struct iphdr*)skb->data; @@ -392,7 +405,7 @@ out: void udp_err(struct sk_buff *skb, u32 info) { - __udp4_lib_err(skb, info, udp_hash); + __udp4_lib_err(skb, info, &udp_table); } /* @@ -933,6 +946,21 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } +void udp_lib_unhash(struct sock *sk) +{ + struct udp_table *udptable = sk->sk_prot->h.udp_table; + unsigned int hash = udp_hashfn(sock_net(sk), sk->sk_hash); + struct udp_hslot *hslot = &udptable->hash[hash]; + + spin_lock(&hslot->lock); + if (sk_del_node_init(sk)) { + inet_sk(sk)->num = 0; + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + } + spin_unlock(&hslot->lock); +} +EXPORT_SYMBOL(udp_lib_unhash); + static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { int is_udplite = IS_UDPLITE(sk); @@ -1071,13 +1099,14 @@ drop: static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udphdr *uh, __be32 saddr, __be32 daddr, - struct hlist_head udptable[]) + struct udp_table *udptable) { struct sock *sk; + struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; int dif; - read_lock(&udp_hash_lock); - sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); + spin_lock(&hslot->lock); + sk = sk_head(&hslot->head); dif = skb->dev->ifindex; sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { @@ -1102,7 +1131,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, } while (sknext); } else kfree_skb(skb); - read_unlock(&udp_hash_lock); + spin_unlock(&hslot->lock); return 0; } @@ -1148,7 +1177,7 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, * All we need to do is get the socket, and then do a checksum. */ -int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], +int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { struct sock *sk; @@ -1246,7 +1275,7 @@ drop: int udp_rcv(struct sk_buff *skb) { - return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); + return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP); } void udp_destroy_sock(struct sock *sk) @@ -1488,7 +1517,7 @@ struct proto udp_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp_sock), - .h.udp_hash = udp_hash, + .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, @@ -1498,20 +1527,23 @@ struct proto udp_prot = { /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -static struct sock *udp_get_first(struct seq_file *seq) +static struct sock *udp_get_first(struct seq_file *seq, int start) { struct sock *sk; struct udp_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { + for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { struct hlist_node *node; - sk_for_each(sk, node, state->hashtable + state->bucket) { + struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; + spin_lock_bh(&hslot->lock); + sk_for_each(sk, node, &hslot->head) { if (!net_eq(sock_net(sk), net)) continue; if (sk->sk_family == state->family) goto found; } + spin_unlock_bh(&hslot->lock); } sk = NULL; found: @@ -1525,20 +1557,18 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) do { sk = sk_next(sk); -try_again: - ; } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); - if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { - sk = sk_head(state->hashtable + state->bucket); - goto try_again; + if (!sk) { + spin_unlock(&state->udp_table->hash[state->bucket].lock); + return udp_get_first(seq, state->bucket + 1); } return sk; } static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) { - struct sock *sk = udp_get_first(seq); + struct sock *sk = udp_get_first(seq, 0); if (sk) while (pos && (sk = udp_get_next(seq, sk)) != NULL) @@ -1547,9 +1577,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) } static void *udp_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(udp_hash_lock) { - read_lock(&udp_hash_lock); return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } @@ -1567,9 +1595,11 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void udp_seq_stop(struct seq_file *seq, void *v) - __releases(udp_hash_lock) { - read_unlock(&udp_hash_lock); + struct udp_iter_state *state = seq->private; + + if (state->bucket < UDP_HTABLE_SIZE) + spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); } static int udp_seq_open(struct inode *inode, struct file *file) @@ -1585,7 +1615,7 @@ static int udp_seq_open(struct inode *inode, struct file *file) s = ((struct seq_file *)file->private_data)->private; s->family = afinfo->family; - s->hashtable = afinfo->hashtable; + s->udp_table = afinfo->udp_table; return err; } @@ -1657,7 +1687,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) static struct udp_seq_afinfo udp4_seq_afinfo = { .name = "udp", .family = AF_INET, - .hashtable = udp_hash, + .udp_table = &udp_table, .seq_fops = { .owner = THIS_MODULE, }, @@ -1692,10 +1722,21 @@ void udp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +void __init udp_table_init(struct udp_table *table) +{ + int i; + + for (i = 0; i < UDP_HTABLE_SIZE; i++) { + INIT_HLIST_HEAD(&table->hash[i].head); + spin_lock_init(&table->hash[i].lock); + } +} + void __init udp_init(void) { unsigned long limit; + udp_table_init(&udp_table); /* Set the pressure threshold up by the same strategy of TCP. It is a * fraction of global memory that is up to 1/2 at 256 MB, decreasing * toward zero with the amount of memory, with a floor of 128 pages. @@ -1712,8 +1753,6 @@ void __init udp_init(void) } EXPORT_SYMBOL(udp_disconnect); -EXPORT_SYMBOL(udp_hash); -EXPORT_SYMBOL(udp_hash_lock); EXPORT_SYMBOL(udp_ioctl); EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(udp_sendmsg); diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 2e9bad2fa1bc..9f4a6165f722 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -5,8 +5,8 @@ #include #include -extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int ); -extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); +extern int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int ); +extern void __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); extern int udp_v4_get_port(struct sock *sk, unsigned short snum); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 3c807964da96..d8ea8e5f5ea3 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -12,16 +12,17 @@ */ #include "udp_impl.h" -struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; +struct udp_table udplite_table; +EXPORT_SYMBOL(udplite_table); static int udplite_rcv(struct sk_buff *skb) { - return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); + return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); } static void udplite_err(struct sk_buff *skb, u32 info) { - __udp4_lib_err(skb, info, udplite_hash); + __udp4_lib_err(skb, info, &udplite_table); } static struct net_protocol udplite_protocol = { @@ -50,7 +51,7 @@ struct proto udplite_prot = { .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, .obj_size = sizeof(struct udp_sock), - .h.udp_hash = udplite_hash, + .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, @@ -71,7 +72,7 @@ static struct inet_protosw udplite4_protosw = { static struct udp_seq_afinfo udplite4_seq_afinfo = { .name = "udplite", .family = AF_INET, - .hashtable = udplite_hash, + .udp_table = &udplite_table, .seq_fops = { .owner = THIS_MODULE, }, @@ -108,6 +109,7 @@ static inline int udplite4_proc_init(void) void __init udplite4_register(void) { + udp_table_init(&udplite_table); if (proto_register(&udplite_prot, 1)) goto out_register_err; @@ -126,5 +128,4 @@ out_register_err: printk(KERN_CRIT "%s: Cannot add UDP-Lite protocol.\n", __func__); } -EXPORT_SYMBOL(udplite_hash); EXPORT_SYMBOL(udplite_prot); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index e51da8c092fa..ccee7244ca0f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -54,62 +54,73 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); } +static inline int compute_score(struct sock *sk, struct net *net, + unsigned short hnum, + struct in6_addr *saddr, __be16 sport, + struct in6_addr *daddr, __be16 dport, + int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && + sk->sk_family == PF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet_sock *inet = inet_sk(sk); + + score = 0; + if (inet->dport) { + if (inet->dport != sport) + return -1; + score++; + } + if (!ipv6_addr_any(&np->rcv_saddr)) { + if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + return -1; + score++; + } + if (!ipv6_addr_any(&np->daddr)) { + if (!ipv6_addr_equal(&np->daddr, saddr)) + return -1; + score++; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + } + return score; +} + static struct sock *__udp6_lib_lookup(struct net *net, struct in6_addr *saddr, __be16 sport, struct in6_addr *daddr, __be16 dport, - int dif, struct hlist_head udptable[]) + int dif, struct udp_table *udptable) { struct sock *sk, *result = NULL; struct hlist_node *node; unsigned short hnum = ntohs(dport); - int badness = -1; - - read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[udp_hashfn(net, hnum)]) { - struct inet_sock *inet = inet_sk(sk); - - if (net_eq(sock_net(sk), net) && sk->sk_hash == hnum && - sk->sk_family == PF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - int score = 0; - if (inet->dport) { - if (inet->dport != sport) - continue; - score++; - } - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) - continue; - score++; - } - if (!ipv6_addr_any(&np->daddr)) { - if (!ipv6_addr_equal(&np->daddr, saddr)) - continue; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score++; - } - if (score == 4) { - result = sk; - break; - } else if (score > badness) { - result = sk; - badness = score; - } + unsigned int hash = udp_hashfn(net, hnum); + struct udp_hslot *hslot = &udptable->hash[hash]; + int score, badness = -1; + + spin_lock(&hslot->lock); + sk_for_each(sk, node, &hslot->head) { + score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); + if (score > badness) { + result = sk; + badness = score; } } if (result) sock_hold(result); - read_unlock(&udp_hash_lock); + spin_unlock(&hslot->lock); return result; } static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, - struct hlist_head udptable[]) + struct udp_table *udptable) { struct sock *sk; struct ipv6hdr *iph = ipv6_hdr(skb); @@ -239,7 +250,7 @@ csum_copy_err: void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info, - struct hlist_head udptable[] ) + struct udp_table *udptable) { struct ipv6_pinfo *np; struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; @@ -275,7 +286,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info ) { - __udp6_lib_err(skb, opt, type, code, offset, info, udp_hash); + __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) @@ -374,14 +385,15 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, */ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct in6_addr *saddr, struct in6_addr *daddr, - struct hlist_head udptable[]) + struct udp_table *udptable) { struct sock *sk, *sk2; const struct udphdr *uh = udp_hdr(skb); + struct udp_hslot *hslot = &udptable->hash[udp_hashfn(net, ntohs(uh->dest))]; int dif; - read_lock(&udp_hash_lock); - sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); + spin_lock(&hslot->lock); + sk = sk_head(&hslot->head); dif = inet6_iif(skb); sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { @@ -409,7 +421,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, sk_add_backlog(sk, skb); bh_unlock_sock(sk); out: - read_unlock(&udp_hash_lock); + spin_unlock(&hslot->lock); return 0; } @@ -447,7 +459,7 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, return 0; } -int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], +int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { struct sock *sk; @@ -544,7 +556,7 @@ discard: static __inline__ int udpv6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP); + return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP); } /* @@ -1008,7 +1020,7 @@ int udp6_seq_show(struct seq_file *seq, void *v) static struct udp_seq_afinfo udp6_seq_afinfo = { .name = "udp6", .family = AF_INET6, - .hashtable = udp_hash, + .udp_table = &udp_table, .seq_fops = { .owner = THIS_MODULE, }, @@ -1050,7 +1062,7 @@ struct proto udpv6_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp6_sock), - .h.udp_hash = udp_hash, + .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 92dd7da766d8..23779208c334 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -7,9 +7,9 @@ #include #include -extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int ); +extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, - int , int , int , __be32 , struct hlist_head []); + int , int , int , __be32 , struct udp_table *); extern int udp_v6_get_port(struct sock *sk, unsigned short snum); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 3cd1a1ac3d6c..f1e892a99e05 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -15,14 +15,14 @@ static int udplitev6_rcv(struct sk_buff *skb) { - return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); + return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); } static void udplitev6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __be32 info) { - __udp6_lib_err(skb, opt, type, code, offset, info, udplite_hash); + __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table); } static struct inet6_protocol udplitev6_protocol = { @@ -49,7 +49,7 @@ struct proto udplitev6_prot = { .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, .obj_size = sizeof(struct udp6_sock), - .h.udp_hash = udplite_hash, + .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, @@ -95,7 +95,7 @@ void udplitev6_exit(void) static struct udp_seq_afinfo udplite6_seq_afinfo = { .name = "udplite6", .family = AF_INET6, - .hashtable = udplite_hash, + .udp_table = &udplite_table, .seq_fops = { .owner = THIS_MODULE, }, -- cgit v1.2.3 From 271b72c7fa82c2c7a795bc16896149933110672d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 02:11:14 -0700 Subject: udp: RCU handling for Unicast packets. Goals are : 1) Optimizing handling of incoming Unicast UDP frames, so that no memory writes should happen in the fast path. Note: Multicasts and broadcasts still will need to take a lock, because doing a full lockless lookup in this case is difficult. 2) No expensive operations in the socket bind/unhash phases : - No expensive synchronize_rcu() calls. - No added rcu_head in socket structure, increasing memory needs, but more important, forcing us to use call_rcu() calls, that have the bad property of making sockets structure cold. (rcu grace period between socket freeing and its potential reuse make this socket being cold in CPU cache). David did a previous patch using call_rcu() and noticed a 20% impact on TCP connection rates. Quoting Cristopher Lameter : "Right. That results in cacheline cooldown. You'd want to recycle the object as they are cache hot on a per cpu basis. That is screwed up by the delayed regular rcu processing. We have seen multiple regressions due to cacheline cooldown. The only choice in cacheline hot sensitive areas is to deal with the complexity that comes with SLAB_DESTROY_BY_RCU or give up on RCU." - Because udp sockets are allocated from dedicated kmem_cache, use of SLAB_DESTROY_BY_RCU can help here. Theory of operation : --------------------- As the lookup is lockfree (using rcu_read_lock()/rcu_read_unlock()), special attention must be taken by readers and writers. Use of SLAB_DESTROY_BY_RCU is tricky too, because a socket can be freed, reused, inserted in a different chain or in worst case in the same chain while readers could do lookups in the same time. In order to avoid loops, a reader must check each socket found in a chain really belongs to the chain the reader was traversing. If it finds a mismatch, lookup must start again at the begining. This *restart* loop is the reason we had to use rdlock for the multicast case, because we dont want to send same message several times to the same socket. We use RCU only for fast path. Thus, /proc/net/udp still takes spinlocks. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 37 ++++++++++++++++++++++++++++++++++++- net/core/sock.c | 3 ++- net/ipv4/udp.c | 35 ++++++++++++++++++++++++++--------- net/ipv4/udplite.c | 1 + net/ipv6/udp.c | 31 ++++++++++++++++++++++++------- net/ipv6/udplite.c | 1 + 6 files changed, 90 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index d200dfbe1ef6..0bea25db5471 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -363,6 +363,27 @@ static __inline__ int sk_del_node_init(struct sock *sk) return rc; } +static __inline__ int __sk_del_node_init_rcu(struct sock *sk) +{ + if (sk_hashed(sk)) { + hlist_del_init_rcu(&sk->sk_node); + return 1; + } + return 0; +} + +static __inline__ int sk_del_node_init_rcu(struct sock *sk) +{ + int rc = __sk_del_node_init_rcu(sk); + + if (rc) { + /* paranoid for a while -acme */ + WARN_ON(atomic_read(&sk->sk_refcnt) == 1); + __sock_put(sk); + } + return rc; +} + static __inline__ void __sk_add_node(struct sock *sk, struct hlist_head *list) { hlist_add_head(&sk->sk_node, list); @@ -374,6 +395,17 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) __sk_add_node(sk, list); } +static __inline__ void __sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +{ + hlist_add_head_rcu(&sk->sk_node, list); +} + +static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +{ + sock_hold(sk); + __sk_add_node_rcu(sk, list); +} + static __inline__ void __sk_del_bind_node(struct sock *sk) { __hlist_del(&sk->sk_bind_node); @@ -387,6 +419,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_for_each(__sk, node, list) \ hlist_for_each_entry(__sk, node, list, sk_node) +#define sk_for_each_rcu(__sk, node, list) \ + hlist_for_each_entry_rcu(__sk, node, list, sk_node) #define sk_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_from(__sk, node, sk_node) @@ -589,8 +623,9 @@ struct proto { int *sysctl_rmem; int max_header; - struct kmem_cache *slab; + struct kmem_cache *slab; unsigned int obj_size; + int slab_flags; atomic_t *orphan_count; diff --git a/net/core/sock.c b/net/core/sock.c index 5e2a3132a8c9..ded1eb5d2fd4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2042,7 +2042,8 @@ int proto_register(struct proto *prot, int alloc_slab) if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, - SLAB_HWCACHE_ALIGN, NULL); + SLAB_HWCACHE_ALIGN | prot->slab_flags, + NULL); if (prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2a6c491f97d7..0ea974bf7962 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -187,7 +187,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { - sk_add_node(sk, &hslot->head); + sk_add_node_rcu(sk, &hslot->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } error = 0; @@ -253,15 +253,24 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, struct udp_table *udptable) { - struct sock *sk, *result = NULL; + struct sock *sk, *result; struct hlist_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; - int score, badness = -1; + int score, badness; - spin_lock(&hslot->lock); - sk_for_each(sk, node, &hslot->head) { + rcu_read_lock(); +begin: + result = NULL; + badness = -1; + sk_for_each_rcu(sk, node, &hslot->head) { + /* + * lockless reader, and SLAB_DESTROY_BY_RCU items: + * We must check this item was not moved to another chain + */ + if (udp_hashfn(net, sk->sk_hash) != hash) + goto begin; score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { @@ -269,9 +278,16 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, badness = score; } } - if (result) - sock_hold(result); - spin_unlock(&hslot->lock); + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, saddr, hnum, sport, + daddr, dport, dif) < badness)) { + sock_put(result); + goto begin; + } + } + rcu_read_unlock(); return result; } @@ -953,7 +969,7 @@ void udp_lib_unhash(struct sock *sk) struct udp_hslot *hslot = &udptable->hash[hash]; spin_lock(&hslot->lock); - if (sk_del_node_init(sk)) { + if (sk_del_node_init_rcu(sk)) { inet_sk(sk)->num = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } @@ -1517,6 +1533,7 @@ struct proto udp_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index d8ea8e5f5ea3..c784891cb7e5 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -51,6 +51,7 @@ struct proto udplite_prot = { .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, .obj_size = sizeof(struct udp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ccee7244ca0f..1d9790e43dfc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -97,24 +97,40 @@ static struct sock *__udp6_lib_lookup(struct net *net, struct in6_addr *daddr, __be16 dport, int dif, struct udp_table *udptable) { - struct sock *sk, *result = NULL; + struct sock *sk, *result; struct hlist_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; - int score, badness = -1; + int score, badness; - spin_lock(&hslot->lock); - sk_for_each(sk, node, &hslot->head) { + rcu_read_lock(); +begin: + result = NULL; + badness = -1; + sk_for_each_rcu(sk, node, &hslot->head) { + /* + * lockless reader, and SLAB_DESTROY_BY_RCU items: + * We must check this item was not moved to another chain + */ + if (udp_hashfn(net, sk->sk_hash) != hash) + goto begin; score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } - if (result) - sock_hold(result); - spin_unlock(&hslot->lock); + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, saddr, sport, + daddr, dport, dif) < badness)) { + sock_put(result); + goto begin; + } + } + rcu_read_unlock(); return result; } @@ -1062,6 +1078,7 @@ struct proto udpv6_prot = { .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index f1e892a99e05..ba162a824585 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -49,6 +49,7 @@ struct proto udplitev6_prot = { .unhash = udp_lib_unhash, .get_port = udp_v6_get_port, .obj_size = sizeof(struct udp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udplite_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udpv6_setsockopt, -- cgit v1.2.3 From 9cbbb3ac628227ec5b65fc043539949db606cd17 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 29 Oct 2008 13:41:35 +0100 Subject: ALSA: Release v1.0.18 Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai --- include/sound/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/version.h b/include/sound/version.h index 4aafeda88634..eae315573563 100644 --- a/include/sound/version.h +++ b/include/sound/version.h @@ -1,3 +1,3 @@ /* include/version.h */ -#define CONFIG_SND_VERSION "1.0.18rc3" +#define CONFIG_SND_VERSION "1.0.18" #define CONFIG_SND_DATE "" -- cgit v1.2.3 From 96631ed16c514cf8b28fab991a076985ce378c26 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Oct 2008 11:19:58 -0700 Subject: udp: introduce sk_for_each_rcu_safenext() Corey Minyard found a race added in commit 271b72c7fa82c2c7a795bc16896149933110672d (udp: RCU handling for Unicast packets.) "If the socket is moved from one list to another list in-between the time the hash is calculated and the next field is accessed, and the socket has moved to the end of the new list, the traversal will not complete properly on the list it should have, since the socket will be on the end of the new list and there's not a way to tell it's on a new list and restart the list traversal. I think that this can be solved by pre-fetching the "next" field (with proper barriers) before checking the hash." This patch corrects this problem, introducing a new sk_for_each_rcu_safenext() macro. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rculist.h | 17 +++++++++++++++++ include/net/sock.h | 4 ++-- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 4 ++-- 4 files changed, 23 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index e649bd3f2c97..3ba2998b22ba 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -383,5 +383,22 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference(pos->next)) +/** + * hlist_for_each_entry_rcu_safenext - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * @next: the &struct hlist_node to use as a next cursor + * + * Special version of hlist_for_each_entry_rcu that make sure + * each next pointer is fetched before each iteration. + */ +#define hlist_for_each_entry_rcu_safenext(tpos, pos, head, member, next) \ + for (pos = rcu_dereference((head)->first); \ + pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference(next)) + #endif /* __KERNEL__ */ #endif diff --git a/include/net/sock.h b/include/net/sock.h index 0bea25db5471..a4f6d3fc0470 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -419,8 +419,8 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_for_each(__sk, node, list) \ hlist_for_each_entry(__sk, node, list, sk_node) -#define sk_for_each_rcu(__sk, node, list) \ - hlist_for_each_entry_rcu(__sk, node, list, sk_node) +#define sk_for_each_rcu_safenext(__sk, node, list, next) \ + hlist_for_each_entry_rcu_safenext(__sk, node, list, sk_node, next) #define sk_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_from(__sk, node, sk_node) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ced820318f94..c3ecec8a9e1c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -256,7 +256,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, int dif, struct udp_table *udptable) { struct sock *sk, *result; - struct hlist_node *node; + struct hlist_node *node, *next; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; @@ -266,7 +266,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, begin: result = NULL; badness = -1; - sk_for_each_rcu(sk, node, &hslot->head) { + sk_for_each_rcu_safenext(sk, node, &hslot->head, next) { /* * lockless reader, and SLAB_DESTROY_BY_RCU items: * We must check this item was not moved to another chain diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1d9790e43dfc..32d914db6c4f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -98,7 +98,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, int dif, struct udp_table *udptable) { struct sock *sk, *result; - struct hlist_node *node; + struct hlist_node *node, *next; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; @@ -108,7 +108,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, begin: result = NULL; badness = -1; - sk_for_each_rcu(sk, node, &hslot->head) { + sk_for_each_rcu_safenext(sk, node, &hslot->head, next) { /* * lockless reader, and SLAB_DESTROY_BY_RCU items: * We must check this item was not moved to another chain -- cgit v1.2.3 From 5b095d98928fdb9e3b75be20a54b7a6cbf6ca9ad Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 29 Oct 2008 12:52:50 -0700 Subject: net: replace %p6 with %pI6 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- drivers/firmware/iscsi_ibft.c | 2 +- drivers/infiniband/core/sysfs.c | 2 +- drivers/infiniband/hw/mthca/mthca_mcg.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 ++-- drivers/infiniband/ulp/ipoib/ipoib_main.c | 12 +++++------ drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 30 +++++++++++++------------- drivers/infiniband/ulp/srp/ib_srp.c | 6 +++--- drivers/net/mlx4/mcg.c | 4 ++-- drivers/scsi/iscsi_tcp.c | 2 +- fs/lockd/host.c | 2 +- fs/nfs/super.c | 2 +- include/linux/sunrpc/svc_xprt.h | 2 +- include/net/ip_vs.h | 2 +- include/net/netfilter/nf_conntrack_tuple.h | 2 +- include/net/sctp/sctp.h | 2 +- net/bridge/netfilter/ebt_log.c | 2 +- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_timer.c | 2 +- net/ipv6/addrlabel.c | 10 ++++----- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 2 +- net/ipv6/exthdrs.c | 2 +- net/ipv6/icmp.c | 2 +- net/ipv6/ip6mr.c | 2 +- net/ipv6/ipcomp6.c | 2 +- net/ipv6/ndisc.c | 2 +- net/ipv6/netfilter/ip6t_LOG.c | 2 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/netfilter/ipvs/ip_vs_conn.c | 4 ++-- net/netfilter/ipvs/ip_vs_core.c | 4 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- net/netfilter/ipvs/ip_vs_proto.c | 6 +++--- net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 2 +- net/netfilter/ipvs/ip_vs_xmit.c | 8 +++---- net/netfilter/nf_conntrack_ftp.c | 2 +- net/netfilter/nf_conntrack_h323_main.c | 4 ++-- net/netfilter/xt_hashlimit.c | 2 +- net/netfilter/xt_recent.c | 2 +- net/netlabel/netlabel_addrlist.c | 2 +- net/sctp/ipv6.c | 18 ++++++++-------- net/sctp/sm_statefuns.c | 2 +- net/sunrpc/clnt.c | 2 +- net/sunrpc/rpcb_clnt.c | 2 +- net/sunrpc/svcauth_unix.c | 4 ++-- net/sunrpc/xprtsock.c | 8 +++---- net/xfrm/xfrm_policy.c | 4 ++-- net/xfrm/xfrm_state.c | 4 ++-- security/selinux/avc.c | 2 +- 49 files changed, 100 insertions(+), 100 deletions(-) (limited to 'include') diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c index 0a6472097a81..acb82aff8808 100644 --- a/drivers/firmware/iscsi_ibft.c +++ b/drivers/firmware/iscsi_ibft.c @@ -290,7 +290,7 @@ static ssize_t sprintf_ipaddr(char *buf, u8 *ip) /* * IPv6 */ - str += sprintf(str, "%p6", ip); + str += sprintf(str, "%pI6", ip); } str += sprintf(str, "\n"); return str - buf; diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index e985193d631c..4f4d1bb9f069 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -262,7 +262,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr, if (ret) return ret; - return sprintf(buf, "%p6\n", gid.raw); + return sprintf(buf, "%pI6\n", gid.raw); } static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr, diff --git a/drivers/infiniband/hw/mthca/mthca_mcg.c b/drivers/infiniband/hw/mthca/mthca_mcg.c index 693bed0b2d1c..d4c81053e439 100644 --- a/drivers/infiniband/hw/mthca/mthca_mcg.c +++ b/drivers/infiniband/hw/mthca/mthca_mcg.c @@ -87,7 +87,7 @@ static int find_mgm(struct mthca_dev *dev, } if (0) - mthca_dbg(dev, "Hash for %p6 is %04x\n", gid, *hash); + mthca_dbg(dev, "Hash for %pI6 is %04x\n", gid, *hash); *index = *hash; *prev = -1; @@ -254,7 +254,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) goto out; if (index == -1) { - mthca_err(dev, "MGID %p6 not found\n", gid->raw); + mthca_err(dev, "MGID %pI6 not found\n", gid->raw); err = -EINVAL; goto out; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index d98d87bfe365..47d588ba2a7f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1128,7 +1128,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, goto err_send_cm; } - ipoib_dbg(priv, "Request connection 0x%x for gid %p6 qpn 0x%x\n", + ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n", p->qp->qp_num, pathrec->dgid.raw, qpn); return 0; @@ -1276,7 +1276,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { list_move(&tx->list, &priv->cm.reap_list); queue_work(ipoib_workqueue, &priv->cm.reap_task); - ipoib_dbg(priv, "Reap connection for gid %p6\n", + ipoib_dbg(priv, "Reap connection for gid %pI6\n", tx->neigh->dgid.raw); tx->neigh = NULL; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index e7f4f94c3e92..b3a671895bdc 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -359,7 +359,7 @@ void ipoib_mark_paths_invalid(struct net_device *dev) spin_lock_irq(&priv->lock); list_for_each_entry_safe(path, tp, &priv->path_list, list) { - ipoib_dbg(priv, "mark path LID 0x%04x GID %p6 invalid\n", + ipoib_dbg(priv, "mark path LID 0x%04x GID %pI6 invalid\n", be16_to_cpu(path->pathrec.dlid), path->pathrec.dgid.raw); path->valid = 0; @@ -413,10 +413,10 @@ static void path_rec_completion(int status, unsigned long flags; if (!status) - ipoib_dbg(priv, "PathRec LID 0x%04x for GID %p6\n", + ipoib_dbg(priv, "PathRec LID 0x%04x for GID %pI6\n", be16_to_cpu(pathrec->dlid), pathrec->dgid.raw); else - ipoib_dbg(priv, "PathRec status %d for GID %p6\n", + ipoib_dbg(priv, "PathRec status %d for GID %pI6\n", status, path->pathrec.dgid.raw); skb_queue_head_init(&skqueue); @@ -527,7 +527,7 @@ static int path_rec_start(struct net_device *dev, { struct ipoib_dev_priv *priv = netdev_priv(dev); - ipoib_dbg(priv, "Start path record lookup for %p6\n", + ipoib_dbg(priv, "Start path record lookup for %pI6\n", path->pathrec.dgid.raw); init_completion(&path->done); @@ -764,7 +764,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) && (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) { - ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %p6\n", + ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n", skb->dst ? "neigh" : "dst", be16_to_cpup((__be16 *) skb->data), IPOIB_QPN(phdr->hwaddr), @@ -844,7 +844,7 @@ static void ipoib_neigh_cleanup(struct neighbour *n) else return; ipoib_dbg(priv, - "neigh_cleanup for %06x %p6\n", + "neigh_cleanup for %06x %pI6\n", IPOIB_QPN(n->ha), n->ha + 4); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 0de79cf4c07c..a2eb3b9789eb 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -71,7 +71,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast) struct ipoib_neigh *neigh, *tmp; int tx_dropped = 0; - ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %p6\n", + ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", mcast->mcmember.mgid.raw); spin_lock_irq(&priv->lock); @@ -204,7 +204,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { - ipoib_warn(priv, "multicast group %p6 already attached\n", + ipoib_warn(priv, "multicast group %pI6 already attached\n", mcast->mcmember.mgid.raw); return 0; @@ -213,7 +213,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, ret = ipoib_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid), &mcast->mcmember.mgid, set_qkey); if (ret < 0) { - ipoib_warn(priv, "couldn't attach QP to multicast group %p6\n", + ipoib_warn(priv, "couldn't attach QP to multicast group %pI6\n", mcast->mcmember.mgid.raw); clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags); @@ -245,7 +245,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, mcast->ah = ah; spin_unlock_irq(&priv->lock); - ipoib_dbg_mcast(priv, "MGID %p6 AV %p, LID 0x%04x, SL %d\n", + ipoib_dbg_mcast(priv, "MGID %pI6 AV %p, LID 0x%04x, SL %d\n", mcast->mcmember.mgid.raw, mcast->ah->ah, be16_to_cpu(mcast->mcmember.mlid), @@ -291,7 +291,7 @@ ipoib_mcast_sendonly_join_complete(int status, if (status) { if (mcast->logcount++ < 20) - ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %p6, status %d\n", + ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n", mcast->mcmember.mgid.raw, status); /* Flush out any queued packets */ @@ -351,7 +351,7 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast) ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n", ret); } else { - ipoib_dbg_mcast(priv, "no multicast record for %p6, starting join\n", + ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n", mcast->mcmember.mgid.raw); } @@ -380,7 +380,7 @@ static int ipoib_mcast_join_complete(int status, struct net_device *dev = mcast->dev; struct ipoib_dev_priv *priv = netdev_priv(dev); - ipoib_dbg_mcast(priv, "join completion for %p6 (status %d)\n", + ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n", mcast->mcmember.mgid.raw, status); /* We trap for port events ourselves. */ @@ -410,10 +410,10 @@ static int ipoib_mcast_join_complete(int status, if (mcast->logcount++ < 20) { if (status == -ETIMEDOUT) { - ipoib_dbg_mcast(priv, "multicast join failed for %p6, status %d\n", + ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n", mcast->mcmember.mgid.raw, status); } else { - ipoib_warn(priv, "multicast join failed for %p6, status %d\n", + ipoib_warn(priv, "multicast join failed for %pI6, status %d\n", mcast->mcmember.mgid.raw, status); } } @@ -446,7 +446,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, ib_sa_comp_mask comp_mask; int ret = 0; - ipoib_dbg_mcast(priv, "joining MGID %p6\n", mcast->mcmember.mgid.raw); + ipoib_dbg_mcast(priv, "joining MGID %pI6\n", mcast->mcmember.mgid.raw); rec.mgid = mcast->mcmember.mgid; rec.port_gid = priv->local_gid; @@ -631,7 +631,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast) ib_sa_free_multicast(mcast->mc); if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) { - ipoib_dbg_mcast(priv, "leaving MGID %p6\n", + ipoib_dbg_mcast(priv, "leaving MGID %pI6\n", mcast->mcmember.mgid.raw); /* Remove ourselves from the multicast group */ @@ -663,7 +663,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) mcast = __ipoib_mcast_find(dev, mgid); if (!mcast) { /* Let's create a new send only group now */ - ipoib_dbg_mcast(priv, "setting up send only multicast group for %p6\n", + ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n", mgid); mcast = ipoib_mcast_alloc(dev, 0); @@ -797,13 +797,13 @@ void ipoib_mcast_restart_task(struct work_struct *work) /* ignore group which is directly joined by userspace */ if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) && !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) { - ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %p6\n", + ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %pI6\n", mgid.raw); continue; } /* Not found or send-only group, let's add a new entry */ - ipoib_dbg_mcast(priv, "adding multicast entry for mgid %p6\n", + ipoib_dbg_mcast(priv, "adding multicast entry for mgid %pI6\n", mgid.raw); nmcast = ipoib_mcast_alloc(dev, 0); @@ -837,7 +837,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) { if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) && !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { - ipoib_dbg_mcast(priv, "deleting multicast group %p6\n", + ipoib_dbg_mcast(priv, "deleting multicast group %pI6\n", mcast->mcmember.mgid.raw); rb_erase(&mcast->rb_node, &priv->multicast_tree); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index bc825310c6db..7c13db885bf6 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1514,7 +1514,7 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, target->state == SRP_TARGET_REMOVED) return -ENODEV; - return sprintf(buf, "%p6\n", target->path.dgid.raw); + return sprintf(buf, "%pI6\n", target->path.dgid.raw); } static ssize_t show_orig_dgid(struct device *dev, @@ -1526,7 +1526,7 @@ static ssize_t show_orig_dgid(struct device *dev, target->state == SRP_TARGET_REMOVED) return -ENODEV; - return sprintf(buf, "%p6\n", target->orig_dgid); + return sprintf(buf, "%pI6\n", target->orig_dgid); } static ssize_t show_zero_req_lim(struct device *dev, @@ -1867,7 +1867,7 @@ static ssize_t srp_create_target(struct device *dev, shost_printk(KERN_DEBUG, target->scsi_host, PFX "new target: id_ext %016llx ioc_guid %016llx pkey %04x " - "service_id %016llx dgid %p6\n", + "service_id %016llx dgid %pI6\n", (unsigned long long) be64_to_cpu(target->id_ext), (unsigned long long) be64_to_cpu(target->ioc_guid), be16_to_cpu(target->path.pkey), diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c index 6f79e84a5c9a..b1622062b12d 100644 --- a/drivers/net/mlx4/mcg.c +++ b/drivers/net/mlx4/mcg.c @@ -118,7 +118,7 @@ static int find_mgm(struct mlx4_dev *dev, return err; if (0) - mlx4_dbg(dev, "Hash for %p6 is %04x\n", gid, *hash); + mlx4_dbg(dev, "Hash for %pI6 is %04x\n", gid, *hash); *index = *hash; *prev = -1; @@ -267,7 +267,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]) goto out; if (index == -1) { - mlx4_err(dev, "MGID %p6 not found\n", gid); + mlx4_err(dev, "MGID %pI6 not found\n", gid); err = -EINVAL; goto out; } diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index ef929aef7c12..24d09028a27f 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -1608,7 +1608,7 @@ static int iscsi_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock, case AF_INET6: sin6 = (struct sockaddr_in6 *)addr; spin_lock_bh(&conn->session->lock); - sprintf(buf, "%p6", &sin6->sin6_addr); + sprintf(buf, "%pI6", &sin6->sin6_addr); *port = be16_to_cpu(sin6->sin6_port); spin_unlock_bh(&conn->session->lock); break; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 344e6b475e05..c8ab7d70390d 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -122,7 +122,7 @@ static void nlm_display_address(const struct sockaddr *sap, snprintf(buf, len, NIPQUAD_FMT, NIPQUAD(sin6->sin6_addr.s6_addr32[3])); else - snprintf(buf, len, "%p6", &sin6->sin6_addr); + snprintf(buf, len, "%pI6", &sin6->sin6_addr); break; default: snprintf(buf, len, "unsupported address family"); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 5fe77219df78..eb391d8d70ba 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -468,7 +468,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, } case AF_INET6: { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; - seq_printf(m, ",mountaddr=%p6", &sin6->sin6_addr); + seq_printf(m, ",mountaddr=%pI6", &sin6->sin6_addr); break; } default: diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 42e01c93c7ea..51cb75ea42d5 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -145,7 +145,7 @@ static inline char *__svc_print_addr(struct sockaddr *addr, break; case AF_INET6: - snprintf(buf, len, "%p6, port=%u", + snprintf(buf, len, "%pI6, port=%u", &((struct sockaddr_in6 *)addr)->sin6_addr, ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); break; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 6a6692067092..af48cada561e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -87,7 +87,7 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, int len; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) - len = snprintf(&buf[*idx], buf_len - *idx, "[%p6]", + len = snprintf(&buf[*idx], buf_len - *idx, "[%pI6]", &addr->in6) + 1; else #endif diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 303efaf68d08..42f1fc96f3ec 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -122,7 +122,7 @@ static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t) static inline void nf_ct_dump_tuple_ipv6(const struct nf_conntrack_tuple *t) { #ifdef DEBUG - printk("tuple %p: %u %p6 %hu -> %p6 %hu\n", + printk("tuple %p: %u %pI6 %hu -> %pI6 %hu\n", t, t->dst.protonum, t->src.u3.all, ntohs(t->src.u.all), t->dst.u3.all, ntohs(t->dst.u.all)); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index a84c3976e1b0..e71b0f7ce88e 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -285,7 +285,7 @@ extern int sctp_debug_flag; if (sctp_debug_flag) { \ if (saddr->sa.sa_family == AF_INET6) { \ printk(KERN_DEBUG \ - lead "%p6" trail, \ + lead "%pI6" trail, \ leadparm, \ &saddr->v6.sin6_addr, \ otherparms); \ diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 3654f3ebdb8f..5e7cff3542f2 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -133,7 +133,7 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum, printk(" INCOMPLETE IPv6 header"); goto out; } - printk(" IPv6 SRC=%p6 IPv6 DST=%p6, IPv6 priority=0x%01X, Next Header=%d", + printk(" IPv6 SRC=%pI6 IPv6 DST=%pI6, IPv6 priority=0x%01X, Next Header=%d", &ih->saddr, &ih->daddr, ih->priority, ih->nexthdr); nexthdr = ih->nexthdr; offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 191c06bb0f67..04909e4b3c4c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2346,7 +2346,7 @@ static void DBGUNDO(struct sock *sk, const char *msg) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - printk(KERN_DEBUG "Undo %s %p6/%u c%u l%u ss%u/%u p%u\n", + printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n", msg, &np->daddr, ntohs(inet->dport), tp->snd_cwnd, tcp_left_out(tp), diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 4e6ee5205237..979c9d604eb0 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -306,7 +306,7 @@ static void tcp_retransmit_timer(struct sock *sk) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (sk->sk_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %p6:%u/%u shrinks window %u:%u. Repaired.\n", + LIMIT_NETDEBUG(KERN_DEBUG "TCP: Treason uncloaked! Peer %pI6:%u/%u shrinks window %u:%u. Repaired.\n", &np->daddr, ntohs(inet->dport), inet->num, tp->snd_una, tp->snd_nxt); } diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index d28036659d27..6ff73c4c126a 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -186,7 +186,7 @@ u32 ipv6_addr_label(struct net *net, label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT; rcu_read_unlock(); - ADDRLABEL(KERN_DEBUG "%s(addr=%p6, type=%d, ifindex=%d) => %08x\n", + ADDRLABEL(KERN_DEBUG "%s(addr=%pI6, type=%d, ifindex=%d) => %08x\n", __func__, addr, type, ifindex, label); return label; @@ -201,7 +201,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, struct ip6addrlbl_entry *newp; int addrtype; - ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u)\n", + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u)\n", __func__, prefix, prefixlen, ifindex, (unsigned int)label); addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK); @@ -289,7 +289,7 @@ static int ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n", __func__, prefix, prefixlen, ifindex, (unsigned int)label, replace); @@ -313,7 +313,7 @@ static int __ip6addrlbl_del(struct net *net, struct hlist_node *pos, *n; int ret = -ESRCH; - ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n", + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, prefixlen, ifindex); hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) { @@ -337,7 +337,7 @@ static int ip6addrlbl_del(struct net *net, struct in6_addr prefix_buf; int ret; - ADDRLABEL(KERN_DEBUG "%s(prefix=%p6, prefixlen=%d, ifindex=%d)\n", + ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, prefixlen, ifindex); ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 9bc43f2527ce..7a8a01369e5c 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -419,7 +419,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%p6\n", + NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", ntohl(ah->spi), &iph->daddr); xfrm_state_put(x); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index ec4be188c341..c02a6308defe 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -367,7 +367,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%p6\n", + printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", ntohl(esph->spi), &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a89051468359..1c7f400a3cfe 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -219,7 +219,7 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff) if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { LIMIT_NETDEBUG( - KERN_DEBUG "hao is not an unicast addr: %p6\n", &hao->addr); + KERN_DEBUG "hao is not an unicast addr: %pI6\n", &hao->addr); goto discard; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b3fa38e40dc4..3c2821f9b529 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -681,7 +681,7 @@ static int icmpv6_rcv(struct sk_buff *skb) skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%p6 > %p6]\n", + LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n", saddr, daddr); goto discard_it; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 798e6a29dd83..c491fb98a5e3 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -297,7 +297,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) const struct mfc6_cache *mfc = v; const struct ipmr_mfc_iter *it = seq->private; - seq_printf(seq, "%p6 %p6 %-3d %8ld %8ld %8ld", + seq_printf(seq, "%pI6 %pI6 %-3d %8ld %8ld %8ld", &mfc->mf6c_mcastgrp, &mfc->mf6c_origin, mfc->mf6c_parent, mfc->mfc_un.res.pkt, diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 9566d8f73140..d4576a9c154f 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -67,7 +67,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!x) return; - printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%p6\n", + printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI6\n", spi, &iph->daddr); xfrm_state_put(x); } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 191bb0722a7c..2a6752dae09d 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -647,7 +647,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) if ((probes -= neigh->parms->ucast_probes) < 0) { if (!(neigh->nud_state & NUD_VALID)) { - ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %p6\n", + ND_PRINTK1(KERN_DEBUG "%s(): trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); } ndisc_send_ns(dev, neigh, target, target, saddr); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index a61ce3010007..02885e8bb69b 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -61,7 +61,7 @@ static void dump_packet(const struct nf_loginfo *info, } /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ - printk("SRC=%p6 DST=%p6 ", &ih->saddr, &ih->daddr); + printk("SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index b165a273c6c0..727b9530448a 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -56,7 +56,7 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, static int ipv6_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "src=%p6 dst=%p6 ", + return seq_printf(s, "src=%pI6 dst=%pI6 ", tuple->src.u3.ip6, tuple->dst.u3.ip6); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 483550cfdf33..984276463a8d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -872,7 +872,7 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) if (genhash || memcmp(hash_location, newhash, 16) != 0) { if (net_ratelimit()) { - printk(KERN_INFO "MD5 Hash %s for (%p6, %u)->(%p6, %u)\n", + printk(KERN_INFO "MD5 Hash %s for (%pI6, %u)->(%pI6, %u)\n", genhash ? "failed" : "mismatch", &ip6h->saddr, ntohs(th->source), &ip6h->daddr, ntohs(th->dest)); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 89bf65be6f2c..60aba45023ff 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -820,7 +820,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) - seq_printf(seq, "%-3s %p6 %04X %p6 %04X %p6 %04X %-11s %7lu\n", + seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %7lu\n", ip_vs_proto_name(cp->protocol), &cp->caddr.in6, ntohs(cp->cport), &cp->vaddr.in6, ntohs(cp->vport), @@ -881,7 +881,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) - seq_printf(seq, "%-3s %p6 %04X %p6 %04X %p6 %04X %-11s %-6s %7lu\n", + seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %-6s %7lu\n", ip_vs_proto_name(cp->protocol), &cp->caddr.in6, ntohs(cp->cport), &cp->vaddr.in6, ntohs(cp->vport), diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 9400587a01e7..c3c68443b5b1 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -805,7 +805,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) if (ic == NULL) return NF_DROP; - IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %p6->%p6\n", + IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) %pI6->%pI6\n", ic->icmp6_type, ntohs(icmpv6_id(ic)), &iph->saddr, &iph->daddr); @@ -1175,7 +1175,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) if (ic == NULL) return NF_DROP; - IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) %p6->%p6\n", + IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) %pI6->%pI6\n", ic->icmp6_type, ntohs(icmpv6_id(ic)), &iph->saddr, &iph->daddr); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 28c47c0d5142..76db27ec9633 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1867,7 +1867,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) - seq_printf(seq, "%s [%p6]:%04X %s ", + seq_printf(seq, "%s [%pI6]:%04X %s ", ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), @@ -1895,7 +1895,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) #ifdef CONFIG_IP_VS_IPV6 if (dest->af == AF_INET6) seq_printf(seq, - " -> [%p6]:%04X" + " -> [%pI6]:%04X" " %-7s %-6d %-10d %-10d\n", &dest->addr.in6, ntohs(dest->port), diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index d7ce4f1839c9..54cd67fbfe74 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -203,7 +203,7 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, if (ih == NULL) sprintf(buf, "%s TRUNCATED", pp->name); else if (ih->nexthdr == IPPROTO_FRAGMENT) - sprintf(buf, "%s %p6->%p6 frag", + sprintf(buf, "%s %pI6->%pI6 frag", pp->name, &ih->saddr, &ih->daddr); else { __be16 _ports[2], *pptr; @@ -211,10 +211,10 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), sizeof(_ports), _ports); if (pptr == NULL) - sprintf(buf, "%s TRUNCATED %p6->%p6", + sprintf(buf, "%s TRUNCATED %pI6->%pI6", pp->name, &ih->saddr, &ih->daddr); else - sprintf(buf, "%s %p6:%u->%p6:%u", + sprintf(buf, "%s %pI6:%u->%pI6:%u", pp->name, &ih->saddr, ntohs(pptr[0]), &ih->daddr, ntohs(pptr[1])); diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 59f2d11b683e..6ede88812044 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -154,7 +154,7 @@ ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb, if (ih == NULL) sprintf(buf, "%s TRUNCATED", pp->name); else - sprintf(buf, "%s %p6->%p6", + sprintf(buf, "%s %pI6->%pI6", pp->name, &ih->saddr, &ih->daddr); printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index be34c335cabe..fc342dda950a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -141,12 +141,12 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) NULL, &fl); if (!rt) { spin_unlock(&dest->dst_lock); - IP_VS_DBG_RL("ip6_route_output error, dest: %p6\n", + IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", &dest->addr.in6); return NULL; } __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst)); - IP_VS_DBG(10, "new dst %p6, refcnt=%d\n", + IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", &dest->addr.in6, atomic_read(&rt->u.dst.__refcnt)); } @@ -166,7 +166,7 @@ __ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); if (!rt) { - IP_VS_DBG_RL("ip6_route_output error, dest: %p6\n", + IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", &cp->daddr.in6); return NULL; } @@ -300,7 +300,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); if (!rt) { - IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, dest: %p6\n", + IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, dest: %pI6\n", &iph->daddr); goto tx_error_icmp; } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 05bf82d345ce..8cab6d595909 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -467,7 +467,7 @@ static int help(struct sk_buff *skb, NIPQUAD(cmd.u3.ip), NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip)); } else { - pr_debug("conntrack_ftp: NOT RECORDING: %p6 != %p6\n", + pr_debug("conntrack_ftp: NOT RECORDING: %pI6 != %pI6\n", cmd.u3.ip6, ct->tuplehash[dir].tuple.src.u3.ip6); } diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 29e49b1c80b3..99bc803d1dd1 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -850,7 +850,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, get_h225_addr(ct, *data, &setup->destCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.src.u3, sizeof(addr))) { - pr_debug("nf_ct_q931: set destCallSignalAddress %p6:%hu->%p6:%hu\n", + pr_debug("nf_ct_q931: set destCallSignalAddress %pI6:%hu->%pI6:%hu\n", &addr, ntohs(port), &ct->tuplehash[!dir].tuple.src.u3, ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); ret = set_h225_addr(skb, data, dataoff, @@ -866,7 +866,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, get_h225_addr(ct, *data, &setup->sourceCallSignalAddress, &addr, &port) && memcmp(&addr, &ct->tuplehash[!dir].tuple.dst.u3, sizeof(addr))) { - pr_debug("nf_ct_q931: set sourceCallSignalAddress %p6:%hu->%p6:%hu\n", + pr_debug("nf_ct_q931: set sourceCallSignalAddress %pI6:%hu->%pI6:%hu\n", &addr, ntohs(port), &ct->tuplehash[!dir].tuple.dst.u3, ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); ret = set_h225_addr(skb, data, dataoff, diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index f04c6ed43674..6379717f9044 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -904,7 +904,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, ent->rateinfo.cost); #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) case NFPROTO_IPV6: - return seq_printf(s, "%ld %p6:%u->%p6:%u %u %u %u\n", + return seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", (long)(ent->expires - jiffies)/HZ, &ent->dst.ip6.src, ntohs(ent->dst.src_port), diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index a377ea333e16..b785727a5bf7 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -426,7 +426,7 @@ static int recent_seq_show(struct seq_file *seq, void *v) "oldest_pkt: %u", NIPQUAD(e->addr.ip), e->ttl, e->stamps[i], e->index); else - seq_printf(seq, "src=%p6 ttl: %u last_seen: %lu oldest_pkt: %u", + seq_printf(seq, "src=%pI6 ttl: %u last_seen: %lu oldest_pkt: %u", &e->addr.in6, e->ttl, e->stamps[i], e->index); for (i = 0; i < e->nstamps; i++) seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]); diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index 614c95ec39df..8b1c58b0820c 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -370,7 +370,7 @@ void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, if (dev != NULL) audit_log_format(audit_buf, " netif=%s", dev); - audit_log_format(audit_buf, " %s=%p6", dir, addr); + audit_log_format(audit_buf, " %s=%pI6", dir, addr); if (ntohl(mask->s6_addr32[3]) != 0xffffffff) { u32 mask_len = 0; u32 mask_val; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e82668bd2b50..ceaa4aa066ea 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -223,7 +223,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) ipv6_addr_copy(&fl.fl6_dst, rt0->addr); } - SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%p6 dst:%p6\n", + SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, skb->len, &fl.fl6_src, &fl.fl6_dst); @@ -251,18 +251,18 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc, fl.oif = daddr->v6.sin6_scope_id; - SCTP_DEBUG_PRINTK("%s: DST=%p6 ", __func__, &fl.fl6_dst); + SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl.fl6_dst); if (saddr) { ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr); - SCTP_DEBUG_PRINTK("SRC=%p6 - ", &fl.fl6_src); + SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl.fl6_src); } dst = ip6_route_output(&init_net, NULL, &fl); if (!dst->error) { struct rt6_info *rt; rt = (struct rt6_info *)dst; - SCTP_DEBUG_PRINTK("rt6_dst:%p6 rt6_src:%p6\n", + SCTP_DEBUG_PRINTK("rt6_dst:%pI6 rt6_src:%pI6\n", &rt->rt6i_dst.addr, &rt->rt6i_src.addr); return dst; } @@ -309,7 +309,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, __u8 matchlen = 0; __u8 bmatchlen; - SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p daddr:%p6 ", + SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p daddr:%pI6 ", __func__, asoc, dst, &daddr->v6.sin6_addr); if (!asoc) { @@ -318,7 +318,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, &daddr->v6.sin6_addr, inet6_sk(&sk->inet.sk)->srcprefs, &saddr->v6.sin6_addr); - SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: %p6\n", + SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: %pI6\n", &saddr->v6.sin6_addr); return; } @@ -347,10 +347,10 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, if (baddr) { memcpy(saddr, baddr, sizeof(union sctp_addr)); - SCTP_DEBUG_PRINTK("saddr: %p6\n", &saddr->v6.sin6_addr); + SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr); } else { printk(KERN_ERR "%s: asoc:%p Could not find a valid source " - "address for the dest:%p6\n", + "address for the dest:%pI6\n", __func__, asoc, &daddr->v6.sin6_addr); } @@ -720,7 +720,7 @@ static int sctp_v6_is_ce(const struct sk_buff *skb) /* Dump the v6 addr to the seq file. */ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) { - seq_printf(seq, "%p6 ", &addr->v6.sin6_addr); + seq_printf(seq, "%pI6 ", &addr->v6.sin6_addr); } static void sctp_v6_ecn_capable(struct sock *sk) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 9f370964e733..d07b484b873a 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1123,7 +1123,7 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, if (from_addr.sa.sa_family == AF_INET6) { if (net_ratelimit()) printk(KERN_WARNING - "%s association %p could not find address %p6\n", + "%s association %p could not find address %pI6\n", __func__, asoc, &from_addr.v6.sin6_addr); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 26f61fd11eab..8f067497c212 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -278,7 +278,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) case AF_INET6: { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)args->address; - snprintf(servername, sizeof(servername), "%p6", + snprintf(servername, sizeof(servername), "%pI6", &sin->sin6_addr); break; } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 968ec1f66bc3..4c8adadc214d 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -305,7 +305,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, snprintf(buf, sizeof(buf), "::.%u.%u", port >> 8, port & 0xff); else - snprintf(buf, sizeof(buf), "%p6.%u.%u", + snprintf(buf, sizeof(buf), "%pI6.%u.%u", &address_to_register->sin6_addr, port >> 8, port & 0xff); map->r_addr = buf; diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index eb640c1a1bc9..16f714a247bc 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -168,7 +168,7 @@ static void ip_map_request(struct cache_detail *cd, ntohl(im->m_addr.s6_addr32[3]) >> 8 & 0xff, ntohl(im->m_addr.s6_addr32[3]) >> 0 & 0xff); } else { - snprintf(text_addr, 40, "%p6", &im->m_addr); + snprintf(text_addr, 40, "%pI6", &im->m_addr); } qword_add(bpp, blen, im->m_class); qword_add(bpp, blen, text_addr); @@ -286,7 +286,7 @@ static int ip_map_show(struct seq_file *m, ntohl(addr.s6_addr32[3]) >> 0 & 0xff, dom); } else { - seq_printf(m, "%s %p6 %s\n", im->m_class, &addr, dom); + seq_printf(m, "%s %pI6 %s\n", im->m_class, &addr, dom); } return 0; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3c9aff584579..f9ce3c9949d5 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -341,7 +341,7 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, buf = kzalloc(40, GFP_KERNEL); if (buf) { - snprintf(buf, 40, "%p6",&addr->sin6_addr); + snprintf(buf, 40, "%pI6",&addr->sin6_addr); } xprt->address_strings[RPC_DISPLAY_ADDR] = buf; @@ -356,7 +356,7 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, buf = kzalloc(64, GFP_KERNEL); if (buf) { - snprintf(buf, 64, "addr=%p6 port=%u proto=%s", + snprintf(buf, 64, "addr=%pI6 port=%u proto=%s", &addr->sin6_addr, ntohs(addr->sin6_port), protocol); @@ -378,7 +378,7 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, buf = kzalloc(50, GFP_KERNEL); if (buf) { - snprintf(buf, 50, "%p6.%u.%u", + snprintf(buf, 50, "%pI6.%u.%u", &addr->sin6_addr, ntohs(addr->sin6_port) >> 8, ntohs(addr->sin6_port) & 0xff); @@ -1407,7 +1407,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) if (port > last) nloop++; } while (err == -EADDRINUSE && nloop != 2); - dprintk("RPC: xs_bind6 %p6:%u: %s (%d)\n", + dprintk("RPC: xs_bind6 %pI6:%u: %s (%d)\n", &myaddr.sin6_addr, port, err ? "failed" : "ok", err); return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index f052b069f983..80b13eea30e7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2467,11 +2467,11 @@ static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, sel->prefixlen_d); break; case AF_INET6: - audit_log_format(audit_buf, " src=%p6", sel->saddr.a6); + audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6); if (sel->prefixlen_s != 128) audit_log_format(audit_buf, " src_prefixlen=%d", sel->prefixlen_s); - audit_log_format(audit_buf, " dst=%p6", sel->daddr.a6); + audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6); if (sel->prefixlen_d != 128) audit_log_format(audit_buf, " dst_prefixlen=%d", sel->prefixlen_d); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7944861fb9bf..304eca4ac970 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2115,7 +2115,7 @@ static void xfrm_audit_helper_sainfo(struct xfrm_state *x, NIPQUAD(x->id.daddr.a4)); break; case AF_INET6: - audit_log_format(audit_buf, " src=%p6 dst=%p6", + audit_log_format(audit_buf, " src=%pI6 dst=%pI6", x->props.saddr.a6, x->id.daddr.a6); break; } @@ -2140,7 +2140,7 @@ static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family, case AF_INET6: iph6 = ipv6_hdr(skb); audit_log_format(audit_buf, - " src=%p6 dst=%p6 flowlbl=0x%x%02x%02x", + " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x", &iph6->saddr,&iph6->daddr, iph6->flow_lbl[0] & 0x0f, iph6->flow_lbl[1], diff --git a/security/selinux/avc.c b/security/selinux/avc.c index c91008f438a2..ed6af12cdf43 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -495,7 +495,7 @@ static inline void avc_print_ipv6_addr(struct audit_buffer *ab, char *name1, char *name2) { if (!ipv6_addr_any(addr)) - audit_log_format(ab, " %s=%p6", name1, addr); + audit_log_format(ab, " %s=%pI6", name1, addr); if (port) audit_log_format(ab, " %s=%d", name2, ntohs(port)); } -- cgit v1.2.3 From 2cb1599f9b2ecdd7a9e59feeee647eb258966839 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:32:23 +1100 Subject: Inode: Allow external initialisers To allow XFS to combine the XFS and linux inodes into a single structure, we need to drive inode lookup from the XFS inode cache, not the generic inode cache. This means that we need initialise a struct inode from a context outside alloc_inode() as it is no longer used by XFS. Factor and export the struct inode initialisation code from alloc_inode() to inode_init_always() as a counterpart to inode_init_once(). i.e. we have to call this init function for each inode instantiation (always), as opposed inode_init_once() which is only called on slab object instantiation (once). Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/inode.c | 140 +++++++++++++++++++++++++++++------------------------ include/linux/fs.h | 1 + 2 files changed, 79 insertions(+), 62 deletions(-) (limited to 'include') diff --git a/fs/inode.c b/fs/inode.c index 0487ddba1397..e7ee99907d60 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -108,84 +108,100 @@ static void wake_up_inode(struct inode *inode) wake_up_bit(&inode->i_state, __I_LOCK); } -static struct inode *alloc_inode(struct super_block *sb) +/** + * inode_init_always - perform inode structure intialisation + * @sb - superblock inode belongs to. + * @inode - inode to initialise + * + * These are initializations that need to be done on every inode + * allocation as the fields are not initialised by slab allocation. + */ +struct inode *inode_init_always(struct super_block *sb, struct inode *inode) { static const struct address_space_operations empty_aops; static struct inode_operations empty_iops; static const struct file_operations empty_fops; - struct inode *inode; - - if (sb->s_op->alloc_inode) - inode = sb->s_op->alloc_inode(sb); - else - inode = (struct inode *) kmem_cache_alloc(inode_cachep, GFP_KERNEL); - if (inode) { - struct address_space * const mapping = &inode->i_data; - - inode->i_sb = sb; - inode->i_blkbits = sb->s_blocksize_bits; - inode->i_flags = 0; - atomic_set(&inode->i_count, 1); - inode->i_op = &empty_iops; - inode->i_fop = &empty_fops; - inode->i_nlink = 1; - atomic_set(&inode->i_writecount, 0); - inode->i_size = 0; - inode->i_blocks = 0; - inode->i_bytes = 0; - inode->i_generation = 0; + struct address_space * const mapping = &inode->i_data; + + inode->i_sb = sb; + inode->i_blkbits = sb->s_blocksize_bits; + inode->i_flags = 0; + atomic_set(&inode->i_count, 1); + inode->i_op = &empty_iops; + inode->i_fop = &empty_fops; + inode->i_nlink = 1; + atomic_set(&inode->i_writecount, 0); + inode->i_size = 0; + inode->i_blocks = 0; + inode->i_bytes = 0; + inode->i_generation = 0; #ifdef CONFIG_QUOTA - memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); + memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); #endif - inode->i_pipe = NULL; - inode->i_bdev = NULL; - inode->i_cdev = NULL; - inode->i_rdev = 0; - inode->dirtied_when = 0; - if (security_inode_alloc(inode)) { - if (inode->i_sb->s_op->destroy_inode) - inode->i_sb->s_op->destroy_inode(inode); - else - kmem_cache_free(inode_cachep, (inode)); - return NULL; - } + inode->i_pipe = NULL; + inode->i_bdev = NULL; + inode->i_cdev = NULL; + inode->i_rdev = 0; + inode->dirtied_when = 0; + if (security_inode_alloc(inode)) { + if (inode->i_sb->s_op->destroy_inode) + inode->i_sb->s_op->destroy_inode(inode); + else + kmem_cache_free(inode_cachep, (inode)); + return NULL; + } - spin_lock_init(&inode->i_lock); - lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); + spin_lock_init(&inode->i_lock); + lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); - mutex_init(&inode->i_mutex); - lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); + mutex_init(&inode->i_mutex); + lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); - init_rwsem(&inode->i_alloc_sem); - lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); + init_rwsem(&inode->i_alloc_sem); + lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); - mapping->a_ops = &empty_aops; - mapping->host = inode; - mapping->flags = 0; - mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); - mapping->assoc_mapping = NULL; - mapping->backing_dev_info = &default_backing_dev_info; - mapping->writeback_index = 0; + mapping->a_ops = &empty_aops; + mapping->host = inode; + mapping->flags = 0; + mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); + mapping->assoc_mapping = NULL; + mapping->backing_dev_info = &default_backing_dev_info; + mapping->writeback_index = 0; - /* - * If the block_device provides a backing_dev_info for client - * inodes then use that. Otherwise the inode share the bdev's - * backing_dev_info. - */ - if (sb->s_bdev) { - struct backing_dev_info *bdi; + /* + * If the block_device provides a backing_dev_info for client + * inodes then use that. Otherwise the inode share the bdev's + * backing_dev_info. + */ + if (sb->s_bdev) { + struct backing_dev_info *bdi; - bdi = sb->s_bdev->bd_inode_backing_dev_info; - if (!bdi) - bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; - mapping->backing_dev_info = bdi; - } - inode->i_private = NULL; - inode->i_mapping = mapping; + bdi = sb->s_bdev->bd_inode_backing_dev_info; + if (!bdi) + bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + mapping->backing_dev_info = bdi; } + inode->i_private = NULL; + inode->i_mapping = mapping; + return inode; } +EXPORT_SYMBOL(inode_init_always); + +static struct inode *alloc_inode(struct super_block *sb) +{ + struct inode *inode; + + if (sb->s_op->alloc_inode) + inode = sb->s_op->alloc_inode(sb); + else + inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); + + if (inode) + return inode_init_always(sb, inode); + return NULL; +} void destroy_inode(struct inode *inode) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 5b248d61430c..04abead4b021 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1881,6 +1881,7 @@ extern loff_t default_llseek(struct file *file, loff_t offset, int origin); extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); +extern struct inode * inode_init_always(struct super_block *, struct inode *); extern void inode_init_once(struct inode *); extern void iput(struct inode *); extern struct inode * igrab(struct inode *); -- cgit v1.2.3 From 8290c35f87304a6b73d4fd17b03580b4f7425de8 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 30 Oct 2008 17:35:24 +1100 Subject: Inode: Allow external list initialisation To allow XFS to combine the XFS and linux inodes into a single structure, we need to drive inode lookup from the XFS inode cache, not the generic inode cache. This means that we need initialise a struct inode from a context outside alloc_inode() as it is no longer used by XFS. After inode allocation and initialisation, we need to add the inode to the superblock list, the in-use list, hash it and do some accounting. This all needs to be done with the inode_lock held and there are already several places in fs/inode.c that do this list manipulation. Factor out the common code, add a locking wrapper and export the function so ti can be called from XFS. Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/inode.c | 67 +++++++++++++++++++++++++++++++++++++----------------- include/linux/fs.h | 1 + 2 files changed, 47 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/fs/inode.c b/fs/inode.c index e7ee99907d60..fbcf6c5e7605 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -550,6 +550,49 @@ repeat: return node ? inode : NULL; } +static unsigned long hash(struct super_block *sb, unsigned long hashval) +{ + unsigned long tmp; + + tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / + L1_CACHE_BYTES; + tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); + return tmp & I_HASHMASK; +} + +static inline void +__inode_add_to_lists(struct super_block *sb, struct hlist_head *head, + struct inode *inode) +{ + inodes_stat.nr_inodes++; + list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_sb_list, &sb->s_inodes); + if (head) + hlist_add_head(&inode->i_hash, head); +} + +/** + * inode_add_to_lists - add a new inode to relevant lists + * @sb - superblock inode belongs to. + * @inode - inode to mark in use + * + * When an inode is allocated it needs to be accounted for, added to the in use + * list, the owning superblock and the inode hash. This needs to be done under + * the inode_lock, so export a function to do this rather than the inode lock + * itself. We calculate the hash list to add to here so it is all internal + * which requires the caller to have already set up the inode number in the + * inode to add. + */ +void inode_add_to_lists(struct super_block *sb, struct inode *inode) +{ + struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino); + + spin_lock(&inode_lock); + __inode_add_to_lists(sb, head, inode); + spin_unlock(&inode_lock); +} +EXPORT_SYMBOL_GPL(inode_add_to_lists); + /** * new_inode - obtain an inode * @sb: superblock @@ -577,9 +620,7 @@ struct inode *new_inode(struct super_block *sb) inode = alloc_inode(sb); if (inode) { spin_lock(&inode_lock); - inodes_stat.nr_inodes++; - list_add(&inode->i_list, &inode_in_use); - list_add(&inode->i_sb_list, &sb->s_inodes); + __inode_add_to_lists(sb, NULL, inode); inode->i_ino = ++last_ino; inode->i_state = 0; spin_unlock(&inode_lock); @@ -638,10 +679,7 @@ static struct inode * get_new_inode(struct super_block *sb, struct hlist_head *h if (set(inode, data)) goto set_failed; - inodes_stat.nr_inodes++; - list_add(&inode->i_list, &inode_in_use); - list_add(&inode->i_sb_list, &sb->s_inodes); - hlist_add_head(&inode->i_hash, head); + __inode_add_to_lists(sb, head, inode); inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); @@ -687,10 +725,7 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he old = find_inode_fast(sb, head, ino); if (!old) { inode->i_ino = ino; - inodes_stat.nr_inodes++; - list_add(&inode->i_list, &inode_in_use); - list_add(&inode->i_sb_list, &sb->s_inodes); - hlist_add_head(&inode->i_hash, head); + __inode_add_to_lists(sb, head, inode); inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); @@ -714,16 +749,6 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he return inode; } -static unsigned long hash(struct super_block *sb, unsigned long hashval) -{ - unsigned long tmp; - - tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / - L1_CACHE_BYTES; - tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); - return tmp & I_HASHMASK; -} - /** * iunique - get a unique inode number * @sb: superblock diff --git a/include/linux/fs.h b/include/linux/fs.h index 04abead4b021..1deedf235d55 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1883,6 +1883,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); extern struct inode * inode_init_always(struct super_block *, struct inode *); extern void inode_init_once(struct inode *); +extern void inode_add_to_lists(struct super_block *, struct inode *); extern void iput(struct inode *); extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); -- cgit v1.2.3 From 180ee700ddfcc882d90410d979a4b3a804380ed2 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 30 Oct 2008 10:14:32 +0000 Subject: [ARM] S3C: Move regs-watchdog.h to arch/arm/plat-s3c/include/plat Move regs-watchdog.h to arch/arm/plat-s3c/include/plat ready to clean out the old include directories Signed-off-by: Ben Dooks --- arch/arm/mach-s3c2410/include/mach/system-reset.h | 2 +- arch/arm/plat-s3c/include/plat/regs-watchdog.h | 41 +++++++++++++++++++++++ arch/arm/plat-s3c/include/plat/uncompress.h | 2 +- drivers/watchdog/s3c2410_wdt.c | 2 +- include/asm-arm/plat-s3c/regs-watchdog.h | 41 ----------------------- 5 files changed, 44 insertions(+), 44 deletions(-) create mode 100644 arch/arm/plat-s3c/include/plat/regs-watchdog.h delete mode 100644 include/asm-arm/plat-s3c/regs-watchdog.h (limited to 'include') diff --git a/arch/arm/mach-s3c2410/include/mach/system-reset.h b/arch/arm/mach-s3c2410/include/mach/system-reset.h index 43535a0e7186..7613d0a384ba 100644 --- a/arch/arm/mach-s3c2410/include/mach/system-reset.h +++ b/arch/arm/mach-s3c2410/include/mach/system-reset.h @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include diff --git a/arch/arm/plat-s3c/include/plat/regs-watchdog.h b/arch/arm/plat-s3c/include/plat/regs-watchdog.h new file mode 100644 index 000000000000..4938492470f7 --- /dev/null +++ b/arch/arm/plat-s3c/include/plat/regs-watchdog.h @@ -0,0 +1,41 @@ +/* arch/arm/mach-s3c2410/include/mach/regs-watchdog.h + * + * Copyright (c) 2003 Simtec Electronics + * http://www.simtec.co.uk/products/SWLINUX/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * S3C2410 Watchdog timer control +*/ + + +#ifndef __ASM_ARCH_REGS_WATCHDOG_H +#define __ASM_ARCH_REGS_WATCHDOG_H + +#define S3C_WDOGREG(x) ((x) + S3C_VA_WATCHDOG) + +#define S3C2410_WTCON S3C_WDOGREG(0x00) +#define S3C2410_WTDAT S3C_WDOGREG(0x04) +#define S3C2410_WTCNT S3C_WDOGREG(0x08) + +/* the watchdog can either generate a reset pulse, or an + * interrupt. + */ + +#define S3C2410_WTCON_RSTEN (0x01) +#define S3C2410_WTCON_INTEN (1<<2) +#define S3C2410_WTCON_ENABLE (1<<5) + +#define S3C2410_WTCON_DIV16 (0<<3) +#define S3C2410_WTCON_DIV32 (1<<3) +#define S3C2410_WTCON_DIV64 (2<<3) +#define S3C2410_WTCON_DIV128 (3<<3) + +#define S3C2410_WTCON_PRESCALE(x) ((x) << 8) +#define S3C2410_WTCON_PRESCALE_MASK (0xff00) + +#endif /* __ASM_ARCH_REGS_WATCHDOG_H */ + + diff --git a/arch/arm/plat-s3c/include/plat/uncompress.h b/arch/arm/plat-s3c/include/plat/uncompress.h index 4df006b9cc10..8a8a927292e0 100644 --- a/arch/arm/plat-s3c/include/plat/uncompress.h +++ b/arch/arm/plat-s3c/include/plat/uncompress.h @@ -28,7 +28,7 @@ static void arch_detect_cpu(void); /* defines for UART registers */ #include -#include +#include /* working in physical space... */ #undef S3C2410_WDOGREG diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index 86d42801de45..13a4b178b8cc 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -42,7 +42,7 @@ #undef S3C_VA_WATCHDOG #define S3C_VA_WATCHDOG (0) -#include +#include #define PFX "s3c2410-wdt: " diff --git a/include/asm-arm/plat-s3c/regs-watchdog.h b/include/asm-arm/plat-s3c/regs-watchdog.h deleted file mode 100644 index 4938492470f7..000000000000 --- a/include/asm-arm/plat-s3c/regs-watchdog.h +++ /dev/null @@ -1,41 +0,0 @@ -/* arch/arm/mach-s3c2410/include/mach/regs-watchdog.h - * - * Copyright (c) 2003 Simtec Electronics - * http://www.simtec.co.uk/products/SWLINUX/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * S3C2410 Watchdog timer control -*/ - - -#ifndef __ASM_ARCH_REGS_WATCHDOG_H -#define __ASM_ARCH_REGS_WATCHDOG_H - -#define S3C_WDOGREG(x) ((x) + S3C_VA_WATCHDOG) - -#define S3C2410_WTCON S3C_WDOGREG(0x00) -#define S3C2410_WTDAT S3C_WDOGREG(0x04) -#define S3C2410_WTCNT S3C_WDOGREG(0x08) - -/* the watchdog can either generate a reset pulse, or an - * interrupt. - */ - -#define S3C2410_WTCON_RSTEN (0x01) -#define S3C2410_WTCON_INTEN (1<<2) -#define S3C2410_WTCON_ENABLE (1<<5) - -#define S3C2410_WTCON_DIV16 (0<<3) -#define S3C2410_WTCON_DIV32 (1<<3) -#define S3C2410_WTCON_DIV64 (2<<3) -#define S3C2410_WTCON_DIV128 (3<<3) - -#define S3C2410_WTCON_PRESCALE(x) ((x) << 8) -#define S3C2410_WTCON_PRESCALE_MASK (0xff00) - -#endif /* __ASM_ARCH_REGS_WATCHDOG_H */ - - -- cgit v1.2.3 From 9498cb79463c9b2abb243a4b0c2ce3ac1853d5b0 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 30 Oct 2008 10:14:33 +0000 Subject: [ARM] S3C: Move i2c headers to arch/arm/plat-s3c/include/plat. Move the i2c headers to arch/arm/plat-s3c/include/plat ready to clean out the old include directories. Signed-off-by: Ben Dooks --- arch/arm/mach-s3c2410/mach-bast.c | 2 +- arch/arm/mach-s3c2410/mach-n30.c | 2 +- arch/arm/mach-s3c2412/mach-jive.c | 2 +- arch/arm/plat-s3c/include/plat/iic.h | 33 ++++++++++++++++++ arch/arm/plat-s3c/include/plat/regs-iic.h | 56 +++++++++++++++++++++++++++++++ drivers/i2c/busses/i2c-s3c2410.c | 4 +-- include/asm-arm/plat-s3c/iic.h | 33 ------------------ include/asm-arm/plat-s3c/regs-iic.h | 56 ------------------------------- 8 files changed, 94 insertions(+), 94 deletions(-) create mode 100644 arch/arm/plat-s3c/include/plat/iic.h create mode 100644 arch/arm/plat-s3c/include/plat/regs-iic.h delete mode 100644 include/asm-arm/plat-s3c/iic.h delete mode 100644 include/asm-arm/plat-s3c/regs-iic.h (limited to 'include') diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c index 8db9c700e3c2..8da05f9e7bb4 100644 --- a/arch/arm/mach-s3c2410/mach-bast.c +++ b/arch/arm/mach-s3c2410/mach-bast.c @@ -45,7 +45,7 @@ #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s3c2410/mach-n30.c b/arch/arm/mach-s3c2410/mach-n30.c index 82505517846c..836c9f639215 100644 --- a/arch/arm/mach-s3c2410/mach-n30.c +++ b/arch/arm/mach-s3c2410/mach-n30.c @@ -40,7 +40,7 @@ #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s3c2412/mach-jive.c b/arch/arm/mach-s3c2412/mach-jive.c index b08f18c8c47a..80208d37756d 100644 --- a/arch/arm/mach-s3c2412/mach-jive.c +++ b/arch/arm/mach-s3c2412/mach-jive.c @@ -32,7 +32,7 @@ #include #include -#include +#include #include #include diff --git a/arch/arm/plat-s3c/include/plat/iic.h b/arch/arm/plat-s3c/include/plat/iic.h new file mode 100644 index 000000000000..5106acaa1d0e --- /dev/null +++ b/arch/arm/plat-s3c/include/plat/iic.h @@ -0,0 +1,33 @@ +/* arch/arm/mach-s3c2410/include/mach/iic.h + * + * Copyright (c) 2004 Simtec Electronics + * Ben Dooks + * + * S3C2410 - I2C Controller platfrom_device info + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ + +#ifndef __ASM_ARCH_IIC_H +#define __ASM_ARCH_IIC_H __FILE__ + +#define S3C_IICFLG_FILTER (1<<0) /* enable s3c2440 filter */ + +/* Notes: + * 1) All frequencies are expressed in Hz + * 2) A value of zero is `do not care` +*/ + +struct s3c2410_platform_i2c { + int bus_num; /* bus number to use */ + unsigned int flags; + unsigned int slave_addr; /* slave address for controller */ + unsigned long bus_freq; /* standard bus frequency */ + unsigned long max_freq; /* max frequency for the bus */ + unsigned long min_freq; /* min frequency for the bus */ + unsigned int sda_delay; /* pclks (s3c2440 only) */ +}; + +#endif /* __ASM_ARCH_IIC_H */ diff --git a/arch/arm/plat-s3c/include/plat/regs-iic.h b/arch/arm/plat-s3c/include/plat/regs-iic.h new file mode 100644 index 000000000000..2f7c17de8ac8 --- /dev/null +++ b/arch/arm/plat-s3c/include/plat/regs-iic.h @@ -0,0 +1,56 @@ +/* arch/arm/mach-s3c2410/include/mach/regs-iic.h + * + * Copyright (c) 2004 Simtec Electronics + * http://www.simtec.co.uk/products/SWLINUX/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * S3C2410 I2C Controller +*/ + +#ifndef __ASM_ARCH_REGS_IIC_H +#define __ASM_ARCH_REGS_IIC_H __FILE__ + +/* see s3c2410x user guide, v1.1, section 9 (p447) for more info */ + +#define S3C2410_IICREG(x) (x) + +#define S3C2410_IICCON S3C2410_IICREG(0x00) +#define S3C2410_IICSTAT S3C2410_IICREG(0x04) +#define S3C2410_IICADD S3C2410_IICREG(0x08) +#define S3C2410_IICDS S3C2410_IICREG(0x0C) +#define S3C2440_IICLC S3C2410_IICREG(0x10) + +#define S3C2410_IICCON_ACKEN (1<<7) +#define S3C2410_IICCON_TXDIV_16 (0<<6) +#define S3C2410_IICCON_TXDIV_512 (1<<6) +#define S3C2410_IICCON_IRQEN (1<<5) +#define S3C2410_IICCON_IRQPEND (1<<4) +#define S3C2410_IICCON_SCALE(x) ((x)&15) +#define S3C2410_IICCON_SCALEMASK (0xf) + +#define S3C2410_IICSTAT_MASTER_RX (2<<6) +#define S3C2410_IICSTAT_MASTER_TX (3<<6) +#define S3C2410_IICSTAT_SLAVE_RX (0<<6) +#define S3C2410_IICSTAT_SLAVE_TX (1<<6) +#define S3C2410_IICSTAT_MODEMASK (3<<6) + +#define S3C2410_IICSTAT_START (1<<5) +#define S3C2410_IICSTAT_BUSBUSY (1<<5) +#define S3C2410_IICSTAT_TXRXEN (1<<4) +#define S3C2410_IICSTAT_ARBITR (1<<3) +#define S3C2410_IICSTAT_ASSLAVE (1<<2) +#define S3C2410_IICSTAT_ADDR0 (1<<1) +#define S3C2410_IICSTAT_LASTBIT (1<<0) + +#define S3C2410_IICLC_SDA_DELAY0 (0 << 0) +#define S3C2410_IICLC_SDA_DELAY5 (1 << 0) +#define S3C2410_IICLC_SDA_DELAY10 (2 << 0) +#define S3C2410_IICLC_SDA_DELAY15 (3 << 0) +#define S3C2410_IICLC_SDA_DELAY_MASK (3 << 0) + +#define S3C2410_IICLC_FILTER_ON (1<<2) + +#endif /* __ASM_ARCH_REGS_IIC_H */ diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index c772e02c2803..4ad9c47ee4fd 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -40,8 +40,8 @@ #include #include -#include -#include +#include +#include /* i2c controller state */ diff --git a/include/asm-arm/plat-s3c/iic.h b/include/asm-arm/plat-s3c/iic.h deleted file mode 100644 index 5106acaa1d0e..000000000000 --- a/include/asm-arm/plat-s3c/iic.h +++ /dev/null @@ -1,33 +0,0 @@ -/* arch/arm/mach-s3c2410/include/mach/iic.h - * - * Copyright (c) 2004 Simtec Electronics - * Ben Dooks - * - * S3C2410 - I2C Controller platfrom_device info - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -#ifndef __ASM_ARCH_IIC_H -#define __ASM_ARCH_IIC_H __FILE__ - -#define S3C_IICFLG_FILTER (1<<0) /* enable s3c2440 filter */ - -/* Notes: - * 1) All frequencies are expressed in Hz - * 2) A value of zero is `do not care` -*/ - -struct s3c2410_platform_i2c { - int bus_num; /* bus number to use */ - unsigned int flags; - unsigned int slave_addr; /* slave address for controller */ - unsigned long bus_freq; /* standard bus frequency */ - unsigned long max_freq; /* max frequency for the bus */ - unsigned long min_freq; /* min frequency for the bus */ - unsigned int sda_delay; /* pclks (s3c2440 only) */ -}; - -#endif /* __ASM_ARCH_IIC_H */ diff --git a/include/asm-arm/plat-s3c/regs-iic.h b/include/asm-arm/plat-s3c/regs-iic.h deleted file mode 100644 index 2f7c17de8ac8..000000000000 --- a/include/asm-arm/plat-s3c/regs-iic.h +++ /dev/null @@ -1,56 +0,0 @@ -/* arch/arm/mach-s3c2410/include/mach/regs-iic.h - * - * Copyright (c) 2004 Simtec Electronics - * http://www.simtec.co.uk/products/SWLINUX/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * S3C2410 I2C Controller -*/ - -#ifndef __ASM_ARCH_REGS_IIC_H -#define __ASM_ARCH_REGS_IIC_H __FILE__ - -/* see s3c2410x user guide, v1.1, section 9 (p447) for more info */ - -#define S3C2410_IICREG(x) (x) - -#define S3C2410_IICCON S3C2410_IICREG(0x00) -#define S3C2410_IICSTAT S3C2410_IICREG(0x04) -#define S3C2410_IICADD S3C2410_IICREG(0x08) -#define S3C2410_IICDS S3C2410_IICREG(0x0C) -#define S3C2440_IICLC S3C2410_IICREG(0x10) - -#define S3C2410_IICCON_ACKEN (1<<7) -#define S3C2410_IICCON_TXDIV_16 (0<<6) -#define S3C2410_IICCON_TXDIV_512 (1<<6) -#define S3C2410_IICCON_IRQEN (1<<5) -#define S3C2410_IICCON_IRQPEND (1<<4) -#define S3C2410_IICCON_SCALE(x) ((x)&15) -#define S3C2410_IICCON_SCALEMASK (0xf) - -#define S3C2410_IICSTAT_MASTER_RX (2<<6) -#define S3C2410_IICSTAT_MASTER_TX (3<<6) -#define S3C2410_IICSTAT_SLAVE_RX (0<<6) -#define S3C2410_IICSTAT_SLAVE_TX (1<<6) -#define S3C2410_IICSTAT_MODEMASK (3<<6) - -#define S3C2410_IICSTAT_START (1<<5) -#define S3C2410_IICSTAT_BUSBUSY (1<<5) -#define S3C2410_IICSTAT_TXRXEN (1<<4) -#define S3C2410_IICSTAT_ARBITR (1<<3) -#define S3C2410_IICSTAT_ASSLAVE (1<<2) -#define S3C2410_IICSTAT_ADDR0 (1<<1) -#define S3C2410_IICSTAT_LASTBIT (1<<0) - -#define S3C2410_IICLC_SDA_DELAY0 (0 << 0) -#define S3C2410_IICLC_SDA_DELAY5 (1 << 0) -#define S3C2410_IICLC_SDA_DELAY10 (2 << 0) -#define S3C2410_IICLC_SDA_DELAY15 (3 << 0) -#define S3C2410_IICLC_SDA_DELAY_MASK (3 << 0) - -#define S3C2410_IICLC_FILTER_ON (1<<2) - -#endif /* __ASM_ARCH_REGS_IIC_H */ -- cgit v1.2.3 From e2cd00cfebd9a25e0e09712b0116ef18edc2cd98 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 30 Oct 2008 10:14:34 +0000 Subject: [ARM] S3C: Move regs-rtc.h to arch/arm/plat-s3c/include/plat Move regs-rtc.h to arch/arm/plat-s3c/include/plat ready to clean out old include directories. Signed-off-by: Ben Dooks --- arch/arm/plat-s3c/include/plat/regs-rtc.h | 61 +++++++++++++++++++++++++++++++ drivers/rtc/rtc-s3c.c | 2 +- include/asm-arm/plat-s3c/regs-rtc.h | 61 ------------------------------- 3 files changed, 62 insertions(+), 62 deletions(-) create mode 100644 arch/arm/plat-s3c/include/plat/regs-rtc.h delete mode 100644 include/asm-arm/plat-s3c/regs-rtc.h (limited to 'include') diff --git a/arch/arm/plat-s3c/include/plat/regs-rtc.h b/arch/arm/plat-s3c/include/plat/regs-rtc.h new file mode 100644 index 000000000000..d5837cf8e402 --- /dev/null +++ b/arch/arm/plat-s3c/include/plat/regs-rtc.h @@ -0,0 +1,61 @@ +/* arch/arm/mach-s3c2410/include/mach/regs-rtc.h + * + * Copyright (c) 2003 Simtec Electronics + * http://www.simtec.co.uk/products/SWLINUX/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * S3C2410 Internal RTC register definition +*/ + +#ifndef __ASM_ARCH_REGS_RTC_H +#define __ASM_ARCH_REGS_RTC_H __FILE__ + +#define S3C2410_RTCREG(x) (x) + +#define S3C2410_RTCCON S3C2410_RTCREG(0x40) +#define S3C2410_RTCCON_RTCEN (1<<0) +#define S3C2410_RTCCON_CLKSEL (1<<1) +#define S3C2410_RTCCON_CNTSEL (1<<2) +#define S3C2410_RTCCON_CLKRST (1<<3) + +#define S3C2410_TICNT S3C2410_RTCREG(0x44) +#define S3C2410_TICNT_ENABLE (1<<7) + +#define S3C2410_RTCALM S3C2410_RTCREG(0x50) +#define S3C2410_RTCALM_ALMEN (1<<6) +#define S3C2410_RTCALM_YEAREN (1<<5) +#define S3C2410_RTCALM_MONEN (1<<4) +#define S3C2410_RTCALM_DAYEN (1<<3) +#define S3C2410_RTCALM_HOUREN (1<<2) +#define S3C2410_RTCALM_MINEN (1<<1) +#define S3C2410_RTCALM_SECEN (1<<0) + +#define S3C2410_RTCALM_ALL \ + S3C2410_RTCALM_ALMEN | S3C2410_RTCALM_YEAREN | S3C2410_RTCALM_MONEN |\ + S3C2410_RTCALM_DAYEN | S3C2410_RTCALM_HOUREN | S3C2410_RTCALM_MINEN |\ + S3C2410_RTCALM_SECEN + + +#define S3C2410_ALMSEC S3C2410_RTCREG(0x54) +#define S3C2410_ALMMIN S3C2410_RTCREG(0x58) +#define S3C2410_ALMHOUR S3C2410_RTCREG(0x5c) + +#define S3C2410_ALMDATE S3C2410_RTCREG(0x60) +#define S3C2410_ALMMON S3C2410_RTCREG(0x64) +#define S3C2410_ALMYEAR S3C2410_RTCREG(0x68) + +#define S3C2410_RTCRST S3C2410_RTCREG(0x6c) + +#define S3C2410_RTCSEC S3C2410_RTCREG(0x70) +#define S3C2410_RTCMIN S3C2410_RTCREG(0x74) +#define S3C2410_RTCHOUR S3C2410_RTCREG(0x78) +#define S3C2410_RTCDATE S3C2410_RTCREG(0x7c) +#define S3C2410_RTCDAY S3C2410_RTCREG(0x80) +#define S3C2410_RTCMON S3C2410_RTCREG(0x84) +#define S3C2410_RTCYEAR S3C2410_RTCREG(0x88) + + +#endif /* __ASM_ARCH_REGS_RTC_H */ diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index 910bc704939c..0273ebc4cf36 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include /* I have yet to find an S3C implementation with more than one * of these rtc blocks in */ diff --git a/include/asm-arm/plat-s3c/regs-rtc.h b/include/asm-arm/plat-s3c/regs-rtc.h deleted file mode 100644 index d5837cf8e402..000000000000 --- a/include/asm-arm/plat-s3c/regs-rtc.h +++ /dev/null @@ -1,61 +0,0 @@ -/* arch/arm/mach-s3c2410/include/mach/regs-rtc.h - * - * Copyright (c) 2003 Simtec Electronics - * http://www.simtec.co.uk/products/SWLINUX/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * S3C2410 Internal RTC register definition -*/ - -#ifndef __ASM_ARCH_REGS_RTC_H -#define __ASM_ARCH_REGS_RTC_H __FILE__ - -#define S3C2410_RTCREG(x) (x) - -#define S3C2410_RTCCON S3C2410_RTCREG(0x40) -#define S3C2410_RTCCON_RTCEN (1<<0) -#define S3C2410_RTCCON_CLKSEL (1<<1) -#define S3C2410_RTCCON_CNTSEL (1<<2) -#define S3C2410_RTCCON_CLKRST (1<<3) - -#define S3C2410_TICNT S3C2410_RTCREG(0x44) -#define S3C2410_TICNT_ENABLE (1<<7) - -#define S3C2410_RTCALM S3C2410_RTCREG(0x50) -#define S3C2410_RTCALM_ALMEN (1<<6) -#define S3C2410_RTCALM_YEAREN (1<<5) -#define S3C2410_RTCALM_MONEN (1<<4) -#define S3C2410_RTCALM_DAYEN (1<<3) -#define S3C2410_RTCALM_HOUREN (1<<2) -#define S3C2410_RTCALM_MINEN (1<<1) -#define S3C2410_RTCALM_SECEN (1<<0) - -#define S3C2410_RTCALM_ALL \ - S3C2410_RTCALM_ALMEN | S3C2410_RTCALM_YEAREN | S3C2410_RTCALM_MONEN |\ - S3C2410_RTCALM_DAYEN | S3C2410_RTCALM_HOUREN | S3C2410_RTCALM_MINEN |\ - S3C2410_RTCALM_SECEN - - -#define S3C2410_ALMSEC S3C2410_RTCREG(0x54) -#define S3C2410_ALMMIN S3C2410_RTCREG(0x58) -#define S3C2410_ALMHOUR S3C2410_RTCREG(0x5c) - -#define S3C2410_ALMDATE S3C2410_RTCREG(0x60) -#define S3C2410_ALMMON S3C2410_RTCREG(0x64) -#define S3C2410_ALMYEAR S3C2410_RTCREG(0x68) - -#define S3C2410_RTCRST S3C2410_RTCREG(0x6c) - -#define S3C2410_RTCSEC S3C2410_RTCREG(0x70) -#define S3C2410_RTCMIN S3C2410_RTCREG(0x74) -#define S3C2410_RTCHOUR S3C2410_RTCREG(0x78) -#define S3C2410_RTCDATE S3C2410_RTCREG(0x7c) -#define S3C2410_RTCDAY S3C2410_RTCREG(0x80) -#define S3C2410_RTCMON S3C2410_RTCREG(0x84) -#define S3C2410_RTCYEAR S3C2410_RTCREG(0x88) - - -#endif /* __ASM_ARCH_REGS_RTC_H */ -- cgit v1.2.3 From 7926b5a325f06745a1bed75bfb4ef814d0ae9d99 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 30 Oct 2008 10:14:35 +0000 Subject: [ARM] S3C: Move nand headers to arch/arm/plat-s3c/include/plat Move nand headers to arch/arm/plat-s3c/include/plat ready to clean out the old include directories. Signed-off-by: Ben Dooks --- arch/arm/mach-s3c2410/mach-bast.c | 2 +- arch/arm/mach-s3c2410/mach-qt2410.c | 2 +- arch/arm/mach-s3c2412/mach-jive.c | 2 +- arch/arm/mach-s3c2412/mach-vstms.c | 2 +- arch/arm/mach-s3c2440/mach-anubis.c | 2 +- arch/arm/mach-s3c2440/mach-at2440evb.c | 2 +- arch/arm/mach-s3c2440/mach-osiris.c | 2 +- arch/arm/mach-s3c2440/mach-rx3715.c | 2 +- arch/arm/plat-s3c/include/plat/nand.h | 50 ++++++++++++ arch/arm/plat-s3c/include/plat/regs-nand.h | 123 +++++++++++++++++++++++++++++ arch/arm/plat-s3c24xx/common-smdk.c | 2 +- drivers/mtd/nand/s3c2410.c | 4 +- include/asm-arm/plat-s3c/nand.h | 50 ------------ include/asm-arm/plat-s3c/regs-nand.h | 123 ----------------------------- 14 files changed, 184 insertions(+), 184 deletions(-) create mode 100644 arch/arm/plat-s3c/include/plat/nand.h create mode 100644 arch/arm/plat-s3c/include/plat/regs-nand.h delete mode 100644 include/asm-arm/plat-s3c/nand.h delete mode 100644 include/asm-arm/plat-s3c/regs-nand.h (limited to 'include') diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c index 8da05f9e7bb4..c04c24444e0d 100644 --- a/arch/arm/mach-s3c2410/mach-bast.c +++ b/arch/arm/mach-s3c2410/mach-bast.c @@ -44,7 +44,7 @@ #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s3c2410/mach-qt2410.c b/arch/arm/mach-s3c2410/mach-qt2410.c index 661807e14e8a..315c27271f18 100644 --- a/arch/arm/mach-s3c2410/mach-qt2410.c +++ b/arch/arm/mach-s3c2410/mach-qt2410.c @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-s3c2412/mach-jive.c b/arch/arm/mach-s3c2412/mach-jive.c index 80208d37756d..c8d9a346b3bf 100644 --- a/arch/arm/mach-s3c2412/mach-jive.c +++ b/arch/arm/mach-s3c2412/mach-jive.c @@ -31,7 +31,7 @@ #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s3c2412/mach-vstms.c b/arch/arm/mach-s3c2412/mach-vstms.c index 4cfa19ad9be0..da32a6cb17ae 100644 --- a/arch/arm/mach-s3c2412/mach-vstms.c +++ b/arch/arm/mach-s3c2412/mach-vstms.c @@ -39,7 +39,7 @@ #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s3c2440/mach-anubis.c b/arch/arm/mach-s3c2440/mach-anubis.c index e2beca470484..334379bdfc6e 100644 --- a/arch/arm/mach-s3c2440/mach-anubis.c +++ b/arch/arm/mach-s3c2440/mach-anubis.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s3c2440/mach-at2440evb.c b/arch/arm/mach-s3c2440/mach-at2440evb.c index 66876c6f2f1c..07b42a0207d1 100644 --- a/arch/arm/mach-s3c2440/mach-at2440evb.c +++ b/arch/arm/mach-s3c2440/mach-at2440evb.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s3c2440/mach-osiris.c b/arch/arm/mach-s3c2440/mach-osiris.c index 2361d606abc5..884a3c7ae75f 100644 --- a/arch/arm/mach-s3c2440/mach-osiris.c +++ b/arch/arm/mach-s3c2440/mach-osiris.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s3c2440/mach-rx3715.c b/arch/arm/mach-s3c2440/mach-rx3715.c index 4d14c7cff892..fbd081de592f 100644 --- a/arch/arm/mach-s3c2440/mach-rx3715.c +++ b/arch/arm/mach-s3c2440/mach-rx3715.c @@ -42,7 +42,7 @@ #include #include -#include +#include #include #include diff --git a/arch/arm/plat-s3c/include/plat/nand.h b/arch/arm/plat-s3c/include/plat/nand.h new file mode 100644 index 000000000000..f4dcd14af059 --- /dev/null +++ b/arch/arm/plat-s3c/include/plat/nand.h @@ -0,0 +1,50 @@ +/* arch/arm/mach-s3c2410/include/mach/nand.h + * + * Copyright (c) 2004 Simtec Electronics + * Ben Dooks + * + * S3C2410 - NAND device controller platfrom_device info + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ + +/* struct s3c2410_nand_set + * + * define an set of one or more nand chips registered with an unique mtd + * + * nr_chips = number of chips in this set + * nr_partitions = number of partitions pointed to be partitoons (or zero) + * name = name of set (optional) + * nr_map = map for low-layer logical to physical chip numbers (option) + * partitions = mtd partition list +*/ + +struct s3c2410_nand_set { + unsigned int disable_ecc : 1; + + int nr_chips; + int nr_partitions; + char *name; + int *nr_map; + struct mtd_partition *partitions; + struct nand_ecclayout *ecc_layout; +}; + +struct s3c2410_platform_nand { + /* timing information for controller, all times in nanoseconds */ + + int tacls; /* time for active CLE/ALE to nWE/nOE */ + int twrph0; /* active time for nWE/nOE */ + int twrph1; /* time for release CLE/ALE from nWE/nOE inactive */ + + unsigned int ignore_unset_ecc : 1; + + int nr_sets; + struct s3c2410_nand_set *sets; + + void (*select_chip)(struct s3c2410_nand_set *, + int chip); +}; + diff --git a/arch/arm/plat-s3c/include/plat/regs-nand.h b/arch/arm/plat-s3c/include/plat/regs-nand.h new file mode 100644 index 000000000000..b2caa4bca270 --- /dev/null +++ b/arch/arm/plat-s3c/include/plat/regs-nand.h @@ -0,0 +1,123 @@ +/* arch/arm/mach-s3c2410/include/mach/regs-nand.h + * + * Copyright (c) 2004,2005 Simtec Electronics + * http://www.simtec.co.uk/products/SWLINUX/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * S3C2410 NAND register definitions +*/ + +#ifndef __ASM_ARM_REGS_NAND +#define __ASM_ARM_REGS_NAND + + +#define S3C2410_NFREG(x) (x) + +#define S3C2410_NFCONF S3C2410_NFREG(0x00) +#define S3C2410_NFCMD S3C2410_NFREG(0x04) +#define S3C2410_NFADDR S3C2410_NFREG(0x08) +#define S3C2410_NFDATA S3C2410_NFREG(0x0C) +#define S3C2410_NFSTAT S3C2410_NFREG(0x10) +#define S3C2410_NFECC S3C2410_NFREG(0x14) + +#define S3C2440_NFCONT S3C2410_NFREG(0x04) +#define S3C2440_NFCMD S3C2410_NFREG(0x08) +#define S3C2440_NFADDR S3C2410_NFREG(0x0C) +#define S3C2440_NFDATA S3C2410_NFREG(0x10) +#define S3C2440_NFECCD0 S3C2410_NFREG(0x14) +#define S3C2440_NFECCD1 S3C2410_NFREG(0x18) +#define S3C2440_NFECCD S3C2410_NFREG(0x1C) +#define S3C2440_NFSTAT S3C2410_NFREG(0x20) +#define S3C2440_NFESTAT0 S3C2410_NFREG(0x24) +#define S3C2440_NFESTAT1 S3C2410_NFREG(0x28) +#define S3C2440_NFMECC0 S3C2410_NFREG(0x2C) +#define S3C2440_NFMECC1 S3C2410_NFREG(0x30) +#define S3C2440_NFSECC S3C2410_NFREG(0x34) +#define S3C2440_NFSBLK S3C2410_NFREG(0x38) +#define S3C2440_NFEBLK S3C2410_NFREG(0x3C) + +#define S3C2412_NFSBLK S3C2410_NFREG(0x20) +#define S3C2412_NFEBLK S3C2410_NFREG(0x24) +#define S3C2412_NFSTAT S3C2410_NFREG(0x28) +#define S3C2412_NFMECC_ERR0 S3C2410_NFREG(0x2C) +#define S3C2412_NFMECC_ERR1 S3C2410_NFREG(0x30) +#define S3C2412_NFMECC0 S3C2410_NFREG(0x34) +#define S3C2412_NFMECC1 S3C2410_NFREG(0x38) +#define S3C2412_NFSECC S3C2410_NFREG(0x3C) + +#define S3C2410_NFCONF_EN (1<<15) +#define S3C2410_NFCONF_512BYTE (1<<14) +#define S3C2410_NFCONF_4STEP (1<<13) +#define S3C2410_NFCONF_INITECC (1<<12) +#define S3C2410_NFCONF_nFCE (1<<11) +#define S3C2410_NFCONF_TACLS(x) ((x)<<8) +#define S3C2410_NFCONF_TWRPH0(x) ((x)<<4) +#define S3C2410_NFCONF_TWRPH1(x) ((x)<<0) + +#define S3C2410_NFSTAT_BUSY (1<<0) + +#define S3C2440_NFCONF_BUSWIDTH_8 (0<<0) +#define S3C2440_NFCONF_BUSWIDTH_16 (1<<0) +#define S3C2440_NFCONF_ADVFLASH (1<<3) +#define S3C2440_NFCONF_TACLS(x) ((x)<<12) +#define S3C2440_NFCONF_TWRPH0(x) ((x)<<8) +#define S3C2440_NFCONF_TWRPH1(x) ((x)<<4) + +#define S3C2440_NFCONT_LOCKTIGHT (1<<13) +#define S3C2440_NFCONT_SOFTLOCK (1<<12) +#define S3C2440_NFCONT_ILLEGALACC_EN (1<<10) +#define S3C2440_NFCONT_RNBINT_EN (1<<9) +#define S3C2440_NFCONT_RN_FALLING (1<<8) +#define S3C2440_NFCONT_SPARE_ECCLOCK (1<<6) +#define S3C2440_NFCONT_MAIN_ECCLOCK (1<<5) +#define S3C2440_NFCONT_INITECC (1<<4) +#define S3C2440_NFCONT_nFCE (1<<1) +#define S3C2440_NFCONT_ENABLE (1<<0) + +#define S3C2440_NFSTAT_READY (1<<0) +#define S3C2440_NFSTAT_nCE (1<<1) +#define S3C2440_NFSTAT_RnB_CHANGE (1<<2) +#define S3C2440_NFSTAT_ILLEGAL_ACCESS (1<<3) + +#define S3C2412_NFCONF_NANDBOOT (1<<31) +#define S3C2412_NFCONF_ECCCLKCON (1<<30) +#define S3C2412_NFCONF_ECC_MLC (1<<24) +#define S3C2412_NFCONF_TACLS_MASK (7<<12) /* 1 extra bit of Tacls */ + +#define S3C2412_NFCONT_ECC4_DIRWR (1<<18) +#define S3C2412_NFCONT_LOCKTIGHT (1<<17) +#define S3C2412_NFCONT_SOFTLOCK (1<<16) +#define S3C2412_NFCONT_ECC4_ENCINT (1<<13) +#define S3C2412_NFCONT_ECC4_DECINT (1<<12) +#define S3C2412_NFCONT_MAIN_ECC_LOCK (1<<7) +#define S3C2412_NFCONT_INIT_MAIN_ECC (1<<5) +#define S3C2412_NFCONT_nFCE1 (1<<2) +#define S3C2412_NFCONT_nFCE0 (1<<1) + +#define S3C2412_NFSTAT_ECC_ENCDONE (1<<7) +#define S3C2412_NFSTAT_ECC_DECDONE (1<<6) +#define S3C2412_NFSTAT_ILLEGAL_ACCESS (1<<5) +#define S3C2412_NFSTAT_RnB_CHANGE (1<<4) +#define S3C2412_NFSTAT_nFCE1 (1<<3) +#define S3C2412_NFSTAT_nFCE0 (1<<2) +#define S3C2412_NFSTAT_Res1 (1<<1) +#define S3C2412_NFSTAT_READY (1<<0) + +#define S3C2412_NFECCERR_SERRDATA(x) (((x) >> 21) & 0xf) +#define S3C2412_NFECCERR_SERRBIT(x) (((x) >> 18) & 0x7) +#define S3C2412_NFECCERR_MERRDATA(x) (((x) >> 7) & 0x3ff) +#define S3C2412_NFECCERR_MERRBIT(x) (((x) >> 4) & 0x7) +#define S3C2412_NFECCERR_SPARE_ERR(x) (((x) >> 2) & 0x3) +#define S3C2412_NFECCERR_MAIN_ERR(x) (((x) >> 2) & 0x3) +#define S3C2412_NFECCERR_NONE (0) +#define S3C2412_NFECCERR_1BIT (1) +#define S3C2412_NFECCERR_MULTIBIT (2) +#define S3C2412_NFECCERR_ECCAREA (3) + + + +#endif /* __ASM_ARM_REGS_NAND */ + diff --git a/arch/arm/plat-s3c24xx/common-smdk.c b/arch/arm/plat-s3c24xx/common-smdk.c index 3098736c65d9..3d4837021ac7 100644 --- a/arch/arm/plat-s3c24xx/common-smdk.c +++ b/arch/arm/plat-s3c24xx/common-smdk.c @@ -38,7 +38,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c index 556139ed1fdf..098f5162388b 100644 --- a/drivers/mtd/nand/s3c2410.c +++ b/drivers/mtd/nand/s3c2410.c @@ -45,8 +45,8 @@ #include -#include -#include +#include +#include #ifdef CONFIG_MTD_NAND_S3C2410_HWECC static int hardware_ecc = 1; diff --git a/include/asm-arm/plat-s3c/nand.h b/include/asm-arm/plat-s3c/nand.h deleted file mode 100644 index f4dcd14af059..000000000000 --- a/include/asm-arm/plat-s3c/nand.h +++ /dev/null @@ -1,50 +0,0 @@ -/* arch/arm/mach-s3c2410/include/mach/nand.h - * - * Copyright (c) 2004 Simtec Electronics - * Ben Dooks - * - * S3C2410 - NAND device controller platfrom_device info - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -/* struct s3c2410_nand_set - * - * define an set of one or more nand chips registered with an unique mtd - * - * nr_chips = number of chips in this set - * nr_partitions = number of partitions pointed to be partitoons (or zero) - * name = name of set (optional) - * nr_map = map for low-layer logical to physical chip numbers (option) - * partitions = mtd partition list -*/ - -struct s3c2410_nand_set { - unsigned int disable_ecc : 1; - - int nr_chips; - int nr_partitions; - char *name; - int *nr_map; - struct mtd_partition *partitions; - struct nand_ecclayout *ecc_layout; -}; - -struct s3c2410_platform_nand { - /* timing information for controller, all times in nanoseconds */ - - int tacls; /* time for active CLE/ALE to nWE/nOE */ - int twrph0; /* active time for nWE/nOE */ - int twrph1; /* time for release CLE/ALE from nWE/nOE inactive */ - - unsigned int ignore_unset_ecc : 1; - - int nr_sets; - struct s3c2410_nand_set *sets; - - void (*select_chip)(struct s3c2410_nand_set *, - int chip); -}; - diff --git a/include/asm-arm/plat-s3c/regs-nand.h b/include/asm-arm/plat-s3c/regs-nand.h deleted file mode 100644 index b2caa4bca270..000000000000 --- a/include/asm-arm/plat-s3c/regs-nand.h +++ /dev/null @@ -1,123 +0,0 @@ -/* arch/arm/mach-s3c2410/include/mach/regs-nand.h - * - * Copyright (c) 2004,2005 Simtec Electronics - * http://www.simtec.co.uk/products/SWLINUX/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * S3C2410 NAND register definitions -*/ - -#ifndef __ASM_ARM_REGS_NAND -#define __ASM_ARM_REGS_NAND - - -#define S3C2410_NFREG(x) (x) - -#define S3C2410_NFCONF S3C2410_NFREG(0x00) -#define S3C2410_NFCMD S3C2410_NFREG(0x04) -#define S3C2410_NFADDR S3C2410_NFREG(0x08) -#define S3C2410_NFDATA S3C2410_NFREG(0x0C) -#define S3C2410_NFSTAT S3C2410_NFREG(0x10) -#define S3C2410_NFECC S3C2410_NFREG(0x14) - -#define S3C2440_NFCONT S3C2410_NFREG(0x04) -#define S3C2440_NFCMD S3C2410_NFREG(0x08) -#define S3C2440_NFADDR S3C2410_NFREG(0x0C) -#define S3C2440_NFDATA S3C2410_NFREG(0x10) -#define S3C2440_NFECCD0 S3C2410_NFREG(0x14) -#define S3C2440_NFECCD1 S3C2410_NFREG(0x18) -#define S3C2440_NFECCD S3C2410_NFREG(0x1C) -#define S3C2440_NFSTAT S3C2410_NFREG(0x20) -#define S3C2440_NFESTAT0 S3C2410_NFREG(0x24) -#define S3C2440_NFESTAT1 S3C2410_NFREG(0x28) -#define S3C2440_NFMECC0 S3C2410_NFREG(0x2C) -#define S3C2440_NFMECC1 S3C2410_NFREG(0x30) -#define S3C2440_NFSECC S3C2410_NFREG(0x34) -#define S3C2440_NFSBLK S3C2410_NFREG(0x38) -#define S3C2440_NFEBLK S3C2410_NFREG(0x3C) - -#define S3C2412_NFSBLK S3C2410_NFREG(0x20) -#define S3C2412_NFEBLK S3C2410_NFREG(0x24) -#define S3C2412_NFSTAT S3C2410_NFREG(0x28) -#define S3C2412_NFMECC_ERR0 S3C2410_NFREG(0x2C) -#define S3C2412_NFMECC_ERR1 S3C2410_NFREG(0x30) -#define S3C2412_NFMECC0 S3C2410_NFREG(0x34) -#define S3C2412_NFMECC1 S3C2410_NFREG(0x38) -#define S3C2412_NFSECC S3C2410_NFREG(0x3C) - -#define S3C2410_NFCONF_EN (1<<15) -#define S3C2410_NFCONF_512BYTE (1<<14) -#define S3C2410_NFCONF_4STEP (1<<13) -#define S3C2410_NFCONF_INITECC (1<<12) -#define S3C2410_NFCONF_nFCE (1<<11) -#define S3C2410_NFCONF_TACLS(x) ((x)<<8) -#define S3C2410_NFCONF_TWRPH0(x) ((x)<<4) -#define S3C2410_NFCONF_TWRPH1(x) ((x)<<0) - -#define S3C2410_NFSTAT_BUSY (1<<0) - -#define S3C2440_NFCONF_BUSWIDTH_8 (0<<0) -#define S3C2440_NFCONF_BUSWIDTH_16 (1<<0) -#define S3C2440_NFCONF_ADVFLASH (1<<3) -#define S3C2440_NFCONF_TACLS(x) ((x)<<12) -#define S3C2440_NFCONF_TWRPH0(x) ((x)<<8) -#define S3C2440_NFCONF_TWRPH1(x) ((x)<<4) - -#define S3C2440_NFCONT_LOCKTIGHT (1<<13) -#define S3C2440_NFCONT_SOFTLOCK (1<<12) -#define S3C2440_NFCONT_ILLEGALACC_EN (1<<10) -#define S3C2440_NFCONT_RNBINT_EN (1<<9) -#define S3C2440_NFCONT_RN_FALLING (1<<8) -#define S3C2440_NFCONT_SPARE_ECCLOCK (1<<6) -#define S3C2440_NFCONT_MAIN_ECCLOCK (1<<5) -#define S3C2440_NFCONT_INITECC (1<<4) -#define S3C2440_NFCONT_nFCE (1<<1) -#define S3C2440_NFCONT_ENABLE (1<<0) - -#define S3C2440_NFSTAT_READY (1<<0) -#define S3C2440_NFSTAT_nCE (1<<1) -#define S3C2440_NFSTAT_RnB_CHANGE (1<<2) -#define S3C2440_NFSTAT_ILLEGAL_ACCESS (1<<3) - -#define S3C2412_NFCONF_NANDBOOT (1<<31) -#define S3C2412_NFCONF_ECCCLKCON (1<<30) -#define S3C2412_NFCONF_ECC_MLC (1<<24) -#define S3C2412_NFCONF_TACLS_MASK (7<<12) /* 1 extra bit of Tacls */ - -#define S3C2412_NFCONT_ECC4_DIRWR (1<<18) -#define S3C2412_NFCONT_LOCKTIGHT (1<<17) -#define S3C2412_NFCONT_SOFTLOCK (1<<16) -#define S3C2412_NFCONT_ECC4_ENCINT (1<<13) -#define S3C2412_NFCONT_ECC4_DECINT (1<<12) -#define S3C2412_NFCONT_MAIN_ECC_LOCK (1<<7) -#define S3C2412_NFCONT_INIT_MAIN_ECC (1<<5) -#define S3C2412_NFCONT_nFCE1 (1<<2) -#define S3C2412_NFCONT_nFCE0 (1<<1) - -#define S3C2412_NFSTAT_ECC_ENCDONE (1<<7) -#define S3C2412_NFSTAT_ECC_DECDONE (1<<6) -#define S3C2412_NFSTAT_ILLEGAL_ACCESS (1<<5) -#define S3C2412_NFSTAT_RnB_CHANGE (1<<4) -#define S3C2412_NFSTAT_nFCE1 (1<<3) -#define S3C2412_NFSTAT_nFCE0 (1<<2) -#define S3C2412_NFSTAT_Res1 (1<<1) -#define S3C2412_NFSTAT_READY (1<<0) - -#define S3C2412_NFECCERR_SERRDATA(x) (((x) >> 21) & 0xf) -#define S3C2412_NFECCERR_SERRBIT(x) (((x) >> 18) & 0x7) -#define S3C2412_NFECCERR_MERRDATA(x) (((x) >> 7) & 0x3ff) -#define S3C2412_NFECCERR_MERRBIT(x) (((x) >> 4) & 0x7) -#define S3C2412_NFECCERR_SPARE_ERR(x) (((x) >> 2) & 0x3) -#define S3C2412_NFECCERR_MAIN_ERR(x) (((x) >> 2) & 0x3) -#define S3C2412_NFECCERR_NONE (0) -#define S3C2412_NFECCERR_1BIT (1) -#define S3C2412_NFECCERR_MULTIBIT (2) -#define S3C2412_NFECCERR_ECCAREA (3) - - - -#endif /* __ASM_ARM_REGS_NAND */ - -- cgit v1.2.3 From f74c95c20bad8e183e41283475f68a3e7b247af4 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 30 Oct 2008 10:14:36 +0000 Subject: [ARM] S3C: Move regs-ac97.h to arch/arm/plat-s3c/include/plat. Move regs-ac97.h to arch/arm/plat-s3c/include/plat ready to clean out old include directories. Signed-off-by: Ben Dooks --- arch/arm/mach-s3c2410/dma.c | 2 +- arch/arm/mach-s3c2412/dma.c | 2 +- arch/arm/mach-s3c2440/dma.c | 2 +- arch/arm/mach-s3c2443/dma.c | 2 +- arch/arm/plat-s3c/include/plat/regs-ac97.h | 67 ++++++++++++++++++++++++++++++ include/asm-arm/plat-s3c/regs-ac97.h | 67 ------------------------------ sound/soc/s3c24xx/s3c2443-ac97.c | 2 +- 7 files changed, 72 insertions(+), 72 deletions(-) create mode 100644 arch/arm/plat-s3c/include/plat/regs-ac97.h delete mode 100644 include/asm-arm/plat-s3c/regs-ac97.h (limited to 'include') diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c index 7d914a470b6c..7fa77effccf8 100644 --- a/arch/arm/mach-s3c2410/dma.c +++ b/arch/arm/mach-s3c2410/dma.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-s3c2412/dma.c b/arch/arm/mach-s3c2412/dma.c index ba0591e71f32..7db581826676 100644 --- a/arch/arm/mach-s3c2412/dma.c +++ b/arch/arm/mach-s3c2412/dma.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-s3c2440/dma.c b/arch/arm/mach-s3c2440/dma.c index 32303f6a8321..00d88782b23b 100644 --- a/arch/arm/mach-s3c2440/dma.c +++ b/arch/arm/mach-s3c2440/dma.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-s3c2443/dma.c b/arch/arm/mach-s3c2443/dma.c index f73ccb25ff94..4185c57b5dd0 100644 --- a/arch/arm/mach-s3c2443/dma.c +++ b/arch/arm/mach-s3c2443/dma.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/plat-s3c/include/plat/regs-ac97.h b/arch/arm/plat-s3c/include/plat/regs-ac97.h new file mode 100644 index 000000000000..c3878f7acb83 --- /dev/null +++ b/arch/arm/plat-s3c/include/plat/regs-ac97.h @@ -0,0 +1,67 @@ +/* arch/arm/mach-s3c2410/include/mach/regs-ac97.h + * + * Copyright (c) 2006 Simtec Electronics + * http://www.simtec.co.uk/products/SWLINUX/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * S3C2440 AC97 Controller +*/ + +#ifndef __ASM_ARCH_REGS_AC97_H +#define __ASM_ARCH_REGS_AC97_H __FILE__ + +#define S3C_AC97_GLBCTRL (0x00) + +#define S3C_AC97_GLBCTRL_CODECREADYIE (1<<22) +#define S3C_AC97_GLBCTRL_PCMOUTURIE (1<<21) +#define S3C_AC97_GLBCTRL_PCMINORIE (1<<20) +#define S3C_AC97_GLBCTRL_MICINORIE (1<<19) +#define S3C_AC97_GLBCTRL_PCMOUTTIE (1<<18) +#define S3C_AC97_GLBCTRL_PCMINTIE (1<<17) +#define S3C_AC97_GLBCTRL_MICINTIE (1<<16) +#define S3C_AC97_GLBCTRL_PCMOUTTM_OFF (0<<12) +#define S3C_AC97_GLBCTRL_PCMOUTTM_PIO (1<<12) +#define S3C_AC97_GLBCTRL_PCMOUTTM_DMA (2<<12) +#define S3C_AC97_GLBCTRL_PCMOUTTM_MASK (3<<12) +#define S3C_AC97_GLBCTRL_PCMINTM_OFF (0<<10) +#define S3C_AC97_GLBCTRL_PCMINTM_PIO (1<<10) +#define S3C_AC97_GLBCTRL_PCMINTM_DMA (2<<10) +#define S3C_AC97_GLBCTRL_PCMINTM_MASK (3<<10) +#define S3C_AC97_GLBCTRL_MICINTM_OFF (0<<8) +#define S3C_AC97_GLBCTRL_MICINTM_PIO (1<<8) +#define S3C_AC97_GLBCTRL_MICINTM_DMA (2<<8) +#define S3C_AC97_GLBCTRL_MICINTM_MASK (3<<8) +#define S3C_AC97_GLBCTRL_TRANSFERDATAENABLE (1<<3) +#define S3C_AC97_GLBCTRL_ACLINKON (1<<2) +#define S3C_AC97_GLBCTRL_WARMRESET (1<<1) +#define S3C_AC97_GLBCTRL_COLDRESET (1<<0) + +#define S3C_AC97_GLBSTAT (0x04) + +#define S3C_AC97_GLBSTAT_CODECREADY (1<<22) +#define S3C_AC97_GLBSTAT_PCMOUTUR (1<<21) +#define S3C_AC97_GLBSTAT_PCMINORI (1<<20) +#define S3C_AC97_GLBSTAT_MICINORI (1<<19) +#define S3C_AC97_GLBSTAT_PCMOUTTI (1<<18) +#define S3C_AC97_GLBSTAT_PCMINTI (1<<17) +#define S3C_AC97_GLBSTAT_MICINTI (1<<16) +#define S3C_AC97_GLBSTAT_MAINSTATE_IDLE (0<<0) +#define S3C_AC97_GLBSTAT_MAINSTATE_INIT (1<<0) +#define S3C_AC97_GLBSTAT_MAINSTATE_READY (2<<0) +#define S3C_AC97_GLBSTAT_MAINSTATE_ACTIVE (3<<0) +#define S3C_AC97_GLBSTAT_MAINSTATE_LP (4<<0) +#define S3C_AC97_GLBSTAT_MAINSTATE_WARM (5<<0) + +#define S3C_AC97_CODEC_CMD (0x08) + +#define S3C_AC97_CODEC_CMD_READ (1<<23) + +#define S3C_AC97_STAT (0x0c) +#define S3C_AC97_PCM_ADDR (0x10) +#define S3C_AC97_PCM_DATA (0x18) +#define S3C_AC97_MIC_DATA (0x1C) + +#endif /* __ASM_ARCH_REGS_AC97_H */ diff --git a/include/asm-arm/plat-s3c/regs-ac97.h b/include/asm-arm/plat-s3c/regs-ac97.h deleted file mode 100644 index c3878f7acb83..000000000000 --- a/include/asm-arm/plat-s3c/regs-ac97.h +++ /dev/null @@ -1,67 +0,0 @@ -/* arch/arm/mach-s3c2410/include/mach/regs-ac97.h - * - * Copyright (c) 2006 Simtec Electronics - * http://www.simtec.co.uk/products/SWLINUX/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * S3C2440 AC97 Controller -*/ - -#ifndef __ASM_ARCH_REGS_AC97_H -#define __ASM_ARCH_REGS_AC97_H __FILE__ - -#define S3C_AC97_GLBCTRL (0x00) - -#define S3C_AC97_GLBCTRL_CODECREADYIE (1<<22) -#define S3C_AC97_GLBCTRL_PCMOUTURIE (1<<21) -#define S3C_AC97_GLBCTRL_PCMINORIE (1<<20) -#define S3C_AC97_GLBCTRL_MICINORIE (1<<19) -#define S3C_AC97_GLBCTRL_PCMOUTTIE (1<<18) -#define S3C_AC97_GLBCTRL_PCMINTIE (1<<17) -#define S3C_AC97_GLBCTRL_MICINTIE (1<<16) -#define S3C_AC97_GLBCTRL_PCMOUTTM_OFF (0<<12) -#define S3C_AC97_GLBCTRL_PCMOUTTM_PIO (1<<12) -#define S3C_AC97_GLBCTRL_PCMOUTTM_DMA (2<<12) -#define S3C_AC97_GLBCTRL_PCMOUTTM_MASK (3<<12) -#define S3C_AC97_GLBCTRL_PCMINTM_OFF (0<<10) -#define S3C_AC97_GLBCTRL_PCMINTM_PIO (1<<10) -#define S3C_AC97_GLBCTRL_PCMINTM_DMA (2<<10) -#define S3C_AC97_GLBCTRL_PCMINTM_MASK (3<<10) -#define S3C_AC97_GLBCTRL_MICINTM_OFF (0<<8) -#define S3C_AC97_GLBCTRL_MICINTM_PIO (1<<8) -#define S3C_AC97_GLBCTRL_MICINTM_DMA (2<<8) -#define S3C_AC97_GLBCTRL_MICINTM_MASK (3<<8) -#define S3C_AC97_GLBCTRL_TRANSFERDATAENABLE (1<<3) -#define S3C_AC97_GLBCTRL_ACLINKON (1<<2) -#define S3C_AC97_GLBCTRL_WARMRESET (1<<1) -#define S3C_AC97_GLBCTRL_COLDRESET (1<<0) - -#define S3C_AC97_GLBSTAT (0x04) - -#define S3C_AC97_GLBSTAT_CODECREADY (1<<22) -#define S3C_AC97_GLBSTAT_PCMOUTUR (1<<21) -#define S3C_AC97_GLBSTAT_PCMINORI (1<<20) -#define S3C_AC97_GLBSTAT_MICINORI (1<<19) -#define S3C_AC97_GLBSTAT_PCMOUTTI (1<<18) -#define S3C_AC97_GLBSTAT_PCMINTI (1<<17) -#define S3C_AC97_GLBSTAT_MICINTI (1<<16) -#define S3C_AC97_GLBSTAT_MAINSTATE_IDLE (0<<0) -#define S3C_AC97_GLBSTAT_MAINSTATE_INIT (1<<0) -#define S3C_AC97_GLBSTAT_MAINSTATE_READY (2<<0) -#define S3C_AC97_GLBSTAT_MAINSTATE_ACTIVE (3<<0) -#define S3C_AC97_GLBSTAT_MAINSTATE_LP (4<<0) -#define S3C_AC97_GLBSTAT_MAINSTATE_WARM (5<<0) - -#define S3C_AC97_CODEC_CMD (0x08) - -#define S3C_AC97_CODEC_CMD_READ (1<<23) - -#define S3C_AC97_STAT (0x0c) -#define S3C_AC97_PCM_ADDR (0x10) -#define S3C_AC97_PCM_DATA (0x18) -#define S3C_AC97_MIC_DATA (0x1C) - -#endif /* __ASM_ARCH_REGS_AC97_H */ diff --git a/sound/soc/s3c24xx/s3c2443-ac97.c b/sound/soc/s3c24xx/s3c2443-ac97.c index 19c5c3cf5d8c..c473a3b97b55 100644 --- a/sound/soc/s3c24xx/s3c2443-ac97.c +++ b/sound/soc/s3c24xx/s3c2443-ac97.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include #include -- cgit v1.2.3 From 57bd4b91a6cfc5bad4c5d829ef85293ea63643ea Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 30 Oct 2008 10:14:37 +0000 Subject: [ARM] S3C24XX: Movev udc headers to arch/arm/plat-s3c24xx/include/plat Move the udc headers to the proper home in arch/arm/plat-s3c24xx/include/plat ready to clean out the old include directories. Signed-off-by: Ben Dooks --- arch/arm/mach-s3c2410/mach-h1940.c | 2 +- arch/arm/mach-s3c2410/mach-n30.c | 2 +- arch/arm/mach-s3c2410/mach-qt2410.c | 2 +- arch/arm/mach-s3c2412/mach-jive.c | 2 +- arch/arm/mach-s3c2412/mach-smdk2413.c | 2 +- arch/arm/plat-s3c24xx/devs.c | 2 +- arch/arm/plat-s3c24xx/include/plat/regs-udc.h | 153 ++++++++++++++++++++++++++ arch/arm/plat-s3c24xx/include/plat/udc.h | 36 ++++++ drivers/usb/gadget/s3c2410_udc.c | 4 +- include/asm-arm/plat-s3c24xx/regs-udc.h | 153 -------------------------- include/asm-arm/plat-s3c24xx/udc.h | 36 ------ 11 files changed, 197 insertions(+), 197 deletions(-) create mode 100644 arch/arm/plat-s3c24xx/include/plat/regs-udc.h create mode 100644 arch/arm/plat-s3c24xx/include/plat/udc.h delete mode 100644 include/asm-arm/plat-s3c24xx/regs-udc.h delete mode 100644 include/asm-arm/plat-s3c24xx/udc.h (limited to 'include') diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c index 98716d0108e9..32d550fcff4d 100644 --- a/arch/arm/mach-s3c2410/mach-h1940.c +++ b/arch/arm/mach-s3c2410/mach-h1940.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s3c2410/mach-n30.c b/arch/arm/mach-s3c2410/mach-n30.c index 836c9f639215..7a7c45d28fe7 100644 --- a/arch/arm/mach-s3c2410/mach-n30.c +++ b/arch/arm/mach-s3c2410/mach-n30.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include static struct map_desc n30_iodesc[] __initdata = { /* nothing here yet */ diff --git a/arch/arm/mach-s3c2410/mach-qt2410.c b/arch/arm/mach-s3c2410/mach-qt2410.c index 315c27271f18..ef868472f6a4 100644 --- a/arch/arm/mach-s3c2410/mach-qt2410.c +++ b/arch/arm/mach-s3c2410/mach-qt2410.c @@ -51,7 +51,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s3c2412/mach-jive.c b/arch/arm/mach-s3c2412/mach-jive.c index c8d9a346b3bf..25ff1ec9f8ad 100644 --- a/arch/arm/mach-s3c2412/mach-jive.c +++ b/arch/arm/mach-s3c2412/mach-jive.c @@ -52,7 +52,7 @@ #include #include #include -#include +#include static struct map_desc jive_iodesc[] __initdata = { }; diff --git a/arch/arm/mach-s3c2412/mach-smdk2413.c b/arch/arm/mach-s3c2412/mach-smdk2413.c index c719b5a740a9..8fd17b8d5679 100644 --- a/arch/arm/mach-s3c2412/mach-smdk2413.c +++ b/arch/arm/mach-s3c2412/mach-smdk2413.c @@ -37,7 +37,7 @@ #include #include -#include +#include #include #include diff --git a/arch/arm/plat-s3c24xx/devs.c b/arch/arm/plat-s3c24xx/devs.c index e93f8bf6d338..07491bcd13ba 100644 --- a/arch/arm/plat-s3c24xx/devs.c +++ b/arch/arm/plat-s3c24xx/devs.c @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include diff --git a/arch/arm/plat-s3c24xx/include/plat/regs-udc.h b/arch/arm/plat-s3c24xx/include/plat/regs-udc.h new file mode 100644 index 000000000000..f0dd4a41b37b --- /dev/null +++ b/arch/arm/plat-s3c24xx/include/plat/regs-udc.h @@ -0,0 +1,153 @@ +/* arch/arm/mach-s3c2410/include/mach/regs-udc.h + * + * Copyright (C) 2004 Herbert Poetzl + * + * This include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. +*/ + +#ifndef __ASM_ARCH_REGS_UDC_H +#define __ASM_ARCH_REGS_UDC_H + +#define S3C2410_USBDREG(x) (x) + +#define S3C2410_UDC_FUNC_ADDR_REG S3C2410_USBDREG(0x0140) +#define S3C2410_UDC_PWR_REG S3C2410_USBDREG(0x0144) +#define S3C2410_UDC_EP_INT_REG S3C2410_USBDREG(0x0148) + +#define S3C2410_UDC_USB_INT_REG S3C2410_USBDREG(0x0158) +#define S3C2410_UDC_EP_INT_EN_REG S3C2410_USBDREG(0x015c) + +#define S3C2410_UDC_USB_INT_EN_REG S3C2410_USBDREG(0x016c) + +#define S3C2410_UDC_FRAME_NUM1_REG S3C2410_USBDREG(0x0170) +#define S3C2410_UDC_FRAME_NUM2_REG S3C2410_USBDREG(0x0174) + +#define S3C2410_UDC_EP0_FIFO_REG S3C2410_USBDREG(0x01c0) +#define S3C2410_UDC_EP1_FIFO_REG S3C2410_USBDREG(0x01c4) +#define S3C2410_UDC_EP2_FIFO_REG S3C2410_USBDREG(0x01c8) +#define S3C2410_UDC_EP3_FIFO_REG S3C2410_USBDREG(0x01cc) +#define S3C2410_UDC_EP4_FIFO_REG S3C2410_USBDREG(0x01d0) + +#define S3C2410_UDC_EP1_DMA_CON S3C2410_USBDREG(0x0200) +#define S3C2410_UDC_EP1_DMA_UNIT S3C2410_USBDREG(0x0204) +#define S3C2410_UDC_EP1_DMA_FIFO S3C2410_USBDREG(0x0208) +#define S3C2410_UDC_EP1_DMA_TTC_L S3C2410_USBDREG(0x020c) +#define S3C2410_UDC_EP1_DMA_TTC_M S3C2410_USBDREG(0x0210) +#define S3C2410_UDC_EP1_DMA_TTC_H S3C2410_USBDREG(0x0214) + +#define S3C2410_UDC_EP2_DMA_CON S3C2410_USBDREG(0x0218) +#define S3C2410_UDC_EP2_DMA_UNIT S3C2410_USBDREG(0x021c) +#define S3C2410_UDC_EP2_DMA_FIFO S3C2410_USBDREG(0x0220) +#define S3C2410_UDC_EP2_DMA_TTC_L S3C2410_USBDREG(0x0224) +#define S3C2410_UDC_EP2_DMA_TTC_M S3C2410_USBDREG(0x0228) +#define S3C2410_UDC_EP2_DMA_TTC_H S3C2410_USBDREG(0x022c) + +#define S3C2410_UDC_EP3_DMA_CON S3C2410_USBDREG(0x0240) +#define S3C2410_UDC_EP3_DMA_UNIT S3C2410_USBDREG(0x0244) +#define S3C2410_UDC_EP3_DMA_FIFO S3C2410_USBDREG(0x0248) +#define S3C2410_UDC_EP3_DMA_TTC_L S3C2410_USBDREG(0x024c) +#define S3C2410_UDC_EP3_DMA_TTC_M S3C2410_USBDREG(0x0250) +#define S3C2410_UDC_EP3_DMA_TTC_H S3C2410_USBDREG(0x0254) + +#define S3C2410_UDC_EP4_DMA_CON S3C2410_USBDREG(0x0258) +#define S3C2410_UDC_EP4_DMA_UNIT S3C2410_USBDREG(0x025c) +#define S3C2410_UDC_EP4_DMA_FIFO S3C2410_USBDREG(0x0260) +#define S3C2410_UDC_EP4_DMA_TTC_L S3C2410_USBDREG(0x0264) +#define S3C2410_UDC_EP4_DMA_TTC_M S3C2410_USBDREG(0x0268) +#define S3C2410_UDC_EP4_DMA_TTC_H S3C2410_USBDREG(0x026c) + +#define S3C2410_UDC_INDEX_REG S3C2410_USBDREG(0x0178) + +/* indexed registers */ + +#define S3C2410_UDC_MAXP_REG S3C2410_USBDREG(0x0180) + +#define S3C2410_UDC_EP0_CSR_REG S3C2410_USBDREG(0x0184) + +#define S3C2410_UDC_IN_CSR1_REG S3C2410_USBDREG(0x0184) +#define S3C2410_UDC_IN_CSR2_REG S3C2410_USBDREG(0x0188) + +#define S3C2410_UDC_OUT_CSR1_REG S3C2410_USBDREG(0x0190) +#define S3C2410_UDC_OUT_CSR2_REG S3C2410_USBDREG(0x0194) +#define S3C2410_UDC_OUT_FIFO_CNT1_REG S3C2410_USBDREG(0x0198) +#define S3C2410_UDC_OUT_FIFO_CNT2_REG S3C2410_USBDREG(0x019c) + +#define S3C2410_UDC_FUNCADDR_UPDATE (1<<7) + +#define S3C2410_UDC_PWR_ISOUP (1<<7) // R/W +#define S3C2410_UDC_PWR_RESET (1<<3) // R +#define S3C2410_UDC_PWR_RESUME (1<<2) // R/W +#define S3C2410_UDC_PWR_SUSPEND (1<<1) // R +#define S3C2410_UDC_PWR_ENSUSPEND (1<<0) // R/W + +#define S3C2410_UDC_PWR_DEFAULT 0x00 + +#define S3C2410_UDC_INT_EP4 (1<<4) // R/W (clear only) +#define S3C2410_UDC_INT_EP3 (1<<3) // R/W (clear only) +#define S3C2410_UDC_INT_EP2 (1<<2) // R/W (clear only) +#define S3C2410_UDC_INT_EP1 (1<<1) // R/W (clear only) +#define S3C2410_UDC_INT_EP0 (1<<0) // R/W (clear only) + +#define S3C2410_UDC_USBINT_RESET (1<<2) // R/W (clear only) +#define S3C2410_UDC_USBINT_RESUME (1<<1) // R/W (clear only) +#define S3C2410_UDC_USBINT_SUSPEND (1<<0) // R/W (clear only) + +#define S3C2410_UDC_INTE_EP4 (1<<4) // R/W +#define S3C2410_UDC_INTE_EP3 (1<<3) // R/W +#define S3C2410_UDC_INTE_EP2 (1<<2) // R/W +#define S3C2410_UDC_INTE_EP1 (1<<1) // R/W +#define S3C2410_UDC_INTE_EP0 (1<<0) // R/W + +#define S3C2410_UDC_USBINTE_RESET (1<<2) // R/W +#define S3C2410_UDC_USBINTE_SUSPEND (1<<0) // R/W + + +#define S3C2410_UDC_INDEX_EP0 (0x00) +#define S3C2410_UDC_INDEX_EP1 (0x01) // ?? +#define S3C2410_UDC_INDEX_EP2 (0x02) // ?? +#define S3C2410_UDC_INDEX_EP3 (0x03) // ?? +#define S3C2410_UDC_INDEX_EP4 (0x04) // ?? + +#define S3C2410_UDC_ICSR1_CLRDT (1<<6) // R/W +#define S3C2410_UDC_ICSR1_SENTSTL (1<<5) // R/W (clear only) +#define S3C2410_UDC_ICSR1_SENDSTL (1<<4) // R/W +#define S3C2410_UDC_ICSR1_FFLUSH (1<<3) // W (set only) +#define S3C2410_UDC_ICSR1_UNDRUN (1<<2) // R/W (clear only) +#define S3C2410_UDC_ICSR1_PKTRDY (1<<0) // R/W (set only) + +#define S3C2410_UDC_ICSR2_AUTOSET (1<<7) // R/W +#define S3C2410_UDC_ICSR2_ISO (1<<6) // R/W +#define S3C2410_UDC_ICSR2_MODEIN (1<<5) // R/W +#define S3C2410_UDC_ICSR2_DMAIEN (1<<4) // R/W + +#define S3C2410_UDC_OCSR1_CLRDT (1<<7) // R/W +#define S3C2410_UDC_OCSR1_SENTSTL (1<<6) // R/W (clear only) +#define S3C2410_UDC_OCSR1_SENDSTL (1<<5) // R/W +#define S3C2410_UDC_OCSR1_FFLUSH (1<<4) // R/W +#define S3C2410_UDC_OCSR1_DERROR (1<<3) // R +#define S3C2410_UDC_OCSR1_OVRRUN (1<<2) // R/W (clear only) +#define S3C2410_UDC_OCSR1_PKTRDY (1<<0) // R/W (clear only) + +#define S3C2410_UDC_OCSR2_AUTOCLR (1<<7) // R/W +#define S3C2410_UDC_OCSR2_ISO (1<<6) // R/W +#define S3C2410_UDC_OCSR2_DMAIEN (1<<5) // R/W + +#define S3C2410_UDC_EP0_CSR_OPKRDY (1<<0) +#define S3C2410_UDC_EP0_CSR_IPKRDY (1<<1) +#define S3C2410_UDC_EP0_CSR_SENTSTL (1<<2) +#define S3C2410_UDC_EP0_CSR_DE (1<<3) +#define S3C2410_UDC_EP0_CSR_SE (1<<4) +#define S3C2410_UDC_EP0_CSR_SENDSTL (1<<5) +#define S3C2410_UDC_EP0_CSR_SOPKTRDY (1<<6) +#define S3C2410_UDC_EP0_CSR_SSE (1<<7) + +#define S3C2410_UDC_MAXP_8 (1<<0) +#define S3C2410_UDC_MAXP_16 (1<<1) +#define S3C2410_UDC_MAXP_32 (1<<2) +#define S3C2410_UDC_MAXP_64 (1<<3) + + +#endif diff --git a/arch/arm/plat-s3c24xx/include/plat/udc.h b/arch/arm/plat-s3c24xx/include/plat/udc.h new file mode 100644 index 000000000000..546bb4008f49 --- /dev/null +++ b/arch/arm/plat-s3c24xx/include/plat/udc.h @@ -0,0 +1,36 @@ +/* arch/arm/mach-s3c2410/include/mach/udc.h + * + * Copyright (c) 2005 Arnaud Patard + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * + * Changelog: + * 14-Mar-2005 RTP Created file + * 02-Aug-2005 RTP File rename + * 07-Sep-2005 BJD Minor cleanups, changed cmd to enum + * 18-Jan-2007 HMW Add per-platform vbus_draw function +*/ + +#ifndef __ASM_ARM_ARCH_UDC_H +#define __ASM_ARM_ARCH_UDC_H + +enum s3c2410_udc_cmd_e { + S3C2410_UDC_P_ENABLE = 1, /* Pull-up enable */ + S3C2410_UDC_P_DISABLE = 2, /* Pull-up disable */ + S3C2410_UDC_P_RESET = 3, /* UDC reset, in case of */ +}; + +struct s3c2410_udc_mach_info { + void (*udc_command)(enum s3c2410_udc_cmd_e); + void (*vbus_draw)(unsigned int ma); + unsigned int vbus_pin; + unsigned char vbus_pin_inverted; +}; + +extern void __init s3c24xx_udc_set_platdata(struct s3c2410_udc_mach_info *); + +#endif /* __ASM_ARM_ARCH_UDC_H */ diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c index 00ba06b44752..8d8d65165983 100644 --- a/drivers/usb/gadget/s3c2410_udc.c +++ b/drivers/usb/gadget/s3c2410_udc.c @@ -53,8 +53,8 @@ #include #include -#include -#include +#include +#include #include "s3c2410_udc.h" diff --git a/include/asm-arm/plat-s3c24xx/regs-udc.h b/include/asm-arm/plat-s3c24xx/regs-udc.h deleted file mode 100644 index f0dd4a41b37b..000000000000 --- a/include/asm-arm/plat-s3c24xx/regs-udc.h +++ /dev/null @@ -1,153 +0,0 @@ -/* arch/arm/mach-s3c2410/include/mach/regs-udc.h - * - * Copyright (C) 2004 Herbert Poetzl - * - * This include file is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. -*/ - -#ifndef __ASM_ARCH_REGS_UDC_H -#define __ASM_ARCH_REGS_UDC_H - -#define S3C2410_USBDREG(x) (x) - -#define S3C2410_UDC_FUNC_ADDR_REG S3C2410_USBDREG(0x0140) -#define S3C2410_UDC_PWR_REG S3C2410_USBDREG(0x0144) -#define S3C2410_UDC_EP_INT_REG S3C2410_USBDREG(0x0148) - -#define S3C2410_UDC_USB_INT_REG S3C2410_USBDREG(0x0158) -#define S3C2410_UDC_EP_INT_EN_REG S3C2410_USBDREG(0x015c) - -#define S3C2410_UDC_USB_INT_EN_REG S3C2410_USBDREG(0x016c) - -#define S3C2410_UDC_FRAME_NUM1_REG S3C2410_USBDREG(0x0170) -#define S3C2410_UDC_FRAME_NUM2_REG S3C2410_USBDREG(0x0174) - -#define S3C2410_UDC_EP0_FIFO_REG S3C2410_USBDREG(0x01c0) -#define S3C2410_UDC_EP1_FIFO_REG S3C2410_USBDREG(0x01c4) -#define S3C2410_UDC_EP2_FIFO_REG S3C2410_USBDREG(0x01c8) -#define S3C2410_UDC_EP3_FIFO_REG S3C2410_USBDREG(0x01cc) -#define S3C2410_UDC_EP4_FIFO_REG S3C2410_USBDREG(0x01d0) - -#define S3C2410_UDC_EP1_DMA_CON S3C2410_USBDREG(0x0200) -#define S3C2410_UDC_EP1_DMA_UNIT S3C2410_USBDREG(0x0204) -#define S3C2410_UDC_EP1_DMA_FIFO S3C2410_USBDREG(0x0208) -#define S3C2410_UDC_EP1_DMA_TTC_L S3C2410_USBDREG(0x020c) -#define S3C2410_UDC_EP1_DMA_TTC_M S3C2410_USBDREG(0x0210) -#define S3C2410_UDC_EP1_DMA_TTC_H S3C2410_USBDREG(0x0214) - -#define S3C2410_UDC_EP2_DMA_CON S3C2410_USBDREG(0x0218) -#define S3C2410_UDC_EP2_DMA_UNIT S3C2410_USBDREG(0x021c) -#define S3C2410_UDC_EP2_DMA_FIFO S3C2410_USBDREG(0x0220) -#define S3C2410_UDC_EP2_DMA_TTC_L S3C2410_USBDREG(0x0224) -#define S3C2410_UDC_EP2_DMA_TTC_M S3C2410_USBDREG(0x0228) -#define S3C2410_UDC_EP2_DMA_TTC_H S3C2410_USBDREG(0x022c) - -#define S3C2410_UDC_EP3_DMA_CON S3C2410_USBDREG(0x0240) -#define S3C2410_UDC_EP3_DMA_UNIT S3C2410_USBDREG(0x0244) -#define S3C2410_UDC_EP3_DMA_FIFO S3C2410_USBDREG(0x0248) -#define S3C2410_UDC_EP3_DMA_TTC_L S3C2410_USBDREG(0x024c) -#define S3C2410_UDC_EP3_DMA_TTC_M S3C2410_USBDREG(0x0250) -#define S3C2410_UDC_EP3_DMA_TTC_H S3C2410_USBDREG(0x0254) - -#define S3C2410_UDC_EP4_DMA_CON S3C2410_USBDREG(0x0258) -#define S3C2410_UDC_EP4_DMA_UNIT S3C2410_USBDREG(0x025c) -#define S3C2410_UDC_EP4_DMA_FIFO S3C2410_USBDREG(0x0260) -#define S3C2410_UDC_EP4_DMA_TTC_L S3C2410_USBDREG(0x0264) -#define S3C2410_UDC_EP4_DMA_TTC_M S3C2410_USBDREG(0x0268) -#define S3C2410_UDC_EP4_DMA_TTC_H S3C2410_USBDREG(0x026c) - -#define S3C2410_UDC_INDEX_REG S3C2410_USBDREG(0x0178) - -/* indexed registers */ - -#define S3C2410_UDC_MAXP_REG S3C2410_USBDREG(0x0180) - -#define S3C2410_UDC_EP0_CSR_REG S3C2410_USBDREG(0x0184) - -#define S3C2410_UDC_IN_CSR1_REG S3C2410_USBDREG(0x0184) -#define S3C2410_UDC_IN_CSR2_REG S3C2410_USBDREG(0x0188) - -#define S3C2410_UDC_OUT_CSR1_REG S3C2410_USBDREG(0x0190) -#define S3C2410_UDC_OUT_CSR2_REG S3C2410_USBDREG(0x0194) -#define S3C2410_UDC_OUT_FIFO_CNT1_REG S3C2410_USBDREG(0x0198) -#define S3C2410_UDC_OUT_FIFO_CNT2_REG S3C2410_USBDREG(0x019c) - -#define S3C2410_UDC_FUNCADDR_UPDATE (1<<7) - -#define S3C2410_UDC_PWR_ISOUP (1<<7) // R/W -#define S3C2410_UDC_PWR_RESET (1<<3) // R -#define S3C2410_UDC_PWR_RESUME (1<<2) // R/W -#define S3C2410_UDC_PWR_SUSPEND (1<<1) // R -#define S3C2410_UDC_PWR_ENSUSPEND (1<<0) // R/W - -#define S3C2410_UDC_PWR_DEFAULT 0x00 - -#define S3C2410_UDC_INT_EP4 (1<<4) // R/W (clear only) -#define S3C2410_UDC_INT_EP3 (1<<3) // R/W (clear only) -#define S3C2410_UDC_INT_EP2 (1<<2) // R/W (clear only) -#define S3C2410_UDC_INT_EP1 (1<<1) // R/W (clear only) -#define S3C2410_UDC_INT_EP0 (1<<0) // R/W (clear only) - -#define S3C2410_UDC_USBINT_RESET (1<<2) // R/W (clear only) -#define S3C2410_UDC_USBINT_RESUME (1<<1) // R/W (clear only) -#define S3C2410_UDC_USBINT_SUSPEND (1<<0) // R/W (clear only) - -#define S3C2410_UDC_INTE_EP4 (1<<4) // R/W -#define S3C2410_UDC_INTE_EP3 (1<<3) // R/W -#define S3C2410_UDC_INTE_EP2 (1<<2) // R/W -#define S3C2410_UDC_INTE_EP1 (1<<1) // R/W -#define S3C2410_UDC_INTE_EP0 (1<<0) // R/W - -#define S3C2410_UDC_USBINTE_RESET (1<<2) // R/W -#define S3C2410_UDC_USBINTE_SUSPEND (1<<0) // R/W - - -#define S3C2410_UDC_INDEX_EP0 (0x00) -#define S3C2410_UDC_INDEX_EP1 (0x01) // ?? -#define S3C2410_UDC_INDEX_EP2 (0x02) // ?? -#define S3C2410_UDC_INDEX_EP3 (0x03) // ?? -#define S3C2410_UDC_INDEX_EP4 (0x04) // ?? - -#define S3C2410_UDC_ICSR1_CLRDT (1<<6) // R/W -#define S3C2410_UDC_ICSR1_SENTSTL (1<<5) // R/W (clear only) -#define S3C2410_UDC_ICSR1_SENDSTL (1<<4) // R/W -#define S3C2410_UDC_ICSR1_FFLUSH (1<<3) // W (set only) -#define S3C2410_UDC_ICSR1_UNDRUN (1<<2) // R/W (clear only) -#define S3C2410_UDC_ICSR1_PKTRDY (1<<0) // R/W (set only) - -#define S3C2410_UDC_ICSR2_AUTOSET (1<<7) // R/W -#define S3C2410_UDC_ICSR2_ISO (1<<6) // R/W -#define S3C2410_UDC_ICSR2_MODEIN (1<<5) // R/W -#define S3C2410_UDC_ICSR2_DMAIEN (1<<4) // R/W - -#define S3C2410_UDC_OCSR1_CLRDT (1<<7) // R/W -#define S3C2410_UDC_OCSR1_SENTSTL (1<<6) // R/W (clear only) -#define S3C2410_UDC_OCSR1_SENDSTL (1<<5) // R/W -#define S3C2410_UDC_OCSR1_FFLUSH (1<<4) // R/W -#define S3C2410_UDC_OCSR1_DERROR (1<<3) // R -#define S3C2410_UDC_OCSR1_OVRRUN (1<<2) // R/W (clear only) -#define S3C2410_UDC_OCSR1_PKTRDY (1<<0) // R/W (clear only) - -#define S3C2410_UDC_OCSR2_AUTOCLR (1<<7) // R/W -#define S3C2410_UDC_OCSR2_ISO (1<<6) // R/W -#define S3C2410_UDC_OCSR2_DMAIEN (1<<5) // R/W - -#define S3C2410_UDC_EP0_CSR_OPKRDY (1<<0) -#define S3C2410_UDC_EP0_CSR_IPKRDY (1<<1) -#define S3C2410_UDC_EP0_CSR_SENTSTL (1<<2) -#define S3C2410_UDC_EP0_CSR_DE (1<<3) -#define S3C2410_UDC_EP0_CSR_SE (1<<4) -#define S3C2410_UDC_EP0_CSR_SENDSTL (1<<5) -#define S3C2410_UDC_EP0_CSR_SOPKTRDY (1<<6) -#define S3C2410_UDC_EP0_CSR_SSE (1<<7) - -#define S3C2410_UDC_MAXP_8 (1<<0) -#define S3C2410_UDC_MAXP_16 (1<<1) -#define S3C2410_UDC_MAXP_32 (1<<2) -#define S3C2410_UDC_MAXP_64 (1<<3) - - -#endif diff --git a/include/asm-arm/plat-s3c24xx/udc.h b/include/asm-arm/plat-s3c24xx/udc.h deleted file mode 100644 index 546bb4008f49..000000000000 --- a/include/asm-arm/plat-s3c24xx/udc.h +++ /dev/null @@ -1,36 +0,0 @@ -/* arch/arm/mach-s3c2410/include/mach/udc.h - * - * Copyright (c) 2005 Arnaud Patard - * - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * - * Changelog: - * 14-Mar-2005 RTP Created file - * 02-Aug-2005 RTP File rename - * 07-Sep-2005 BJD Minor cleanups, changed cmd to enum - * 18-Jan-2007 HMW Add per-platform vbus_draw function -*/ - -#ifndef __ASM_ARM_ARCH_UDC_H -#define __ASM_ARM_ARCH_UDC_H - -enum s3c2410_udc_cmd_e { - S3C2410_UDC_P_ENABLE = 1, /* Pull-up enable */ - S3C2410_UDC_P_DISABLE = 2, /* Pull-up disable */ - S3C2410_UDC_P_RESET = 3, /* UDC reset, in case of */ -}; - -struct s3c2410_udc_mach_info { - void (*udc_command)(enum s3c2410_udc_cmd_e); - void (*vbus_draw)(unsigned int ma); - unsigned int vbus_pin; - unsigned char vbus_pin_inverted; -}; - -extern void __init s3c24xx_udc_set_platdata(struct s3c2410_udc_mach_info *); - -#endif /* __ASM_ARM_ARCH_UDC_H */ -- cgit v1.2.3 From 13622708725990b01fbc6d59d54d93820a726d7c Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 30 Oct 2008 10:14:38 +0000 Subject: [ARM] S3C: Move plat/regs-spi.h to arch/arm/plat-s3c/include/plat. Move plat/regs-spi.h to arch/arm/plat-s3c/include/plat ready ready to clean out old include directories. Signed-off-by: Ben Dooks --- arch/arm/mach-s3c2410/dma.c | 2 +- arch/arm/mach-s3c2412/dma.c | 2 +- arch/arm/mach-s3c2412/s3c2412.c | 2 +- arch/arm/mach-s3c2440/dma.c | 2 +- arch/arm/mach-s3c2443/dma.c | 2 +- arch/arm/plat-s3c24xx/devs.c | 2 +- arch/arm/plat-s3c24xx/include/plat/regs-spi.h | 82 +++++++++++++++++++++++++++ drivers/spi/spi_s3c24xx.c | 2 +- include/asm-arm/plat-s3c24xx/regs-spi.h | 82 --------------------------- 9 files changed, 89 insertions(+), 89 deletions(-) create mode 100644 arch/arm/plat-s3c24xx/include/plat/regs-spi.h delete mode 100644 include/asm-arm/plat-s3c24xx/regs-spi.h (limited to 'include') diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c index 7fa77effccf8..30983cc612df 100644 --- a/arch/arm/mach-s3c2410/dma.c +++ b/arch/arm/mach-s3c2410/dma.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include static struct s3c24xx_dma_map __initdata s3c2410_dma_mappings[] = { [DMACH_XD0] = { diff --git a/arch/arm/mach-s3c2412/dma.c b/arch/arm/mach-s3c2412/dma.c index 7db581826676..f8b2dd4a3632 100644 --- a/arch/arm/mach-s3c2412/dma.c +++ b/arch/arm/mach-s3c2412/dma.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #define MAP(x) { (x)| DMA_CH_VALID, (x)| DMA_CH_VALID, (x)| DMA_CH_VALID, (x)| DMA_CH_VALID } diff --git a/arch/arm/mach-s3c2412/s3c2412.c b/arch/arm/mach-s3c2412/s3c2412.c index 313759c3da69..a086818e117e 100644 --- a/arch/arm/mach-s3c2412/s3c2412.c +++ b/arch/arm/mach-s3c2412/s3c2412.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mach-s3c2440/dma.c b/arch/arm/mach-s3c2440/dma.c index 00d88782b23b..53be553bfc21 100644 --- a/arch/arm/mach-s3c2440/dma.c +++ b/arch/arm/mach-s3c2440/dma.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include static struct s3c24xx_dma_map __initdata s3c2440_dma_mappings[] = { [DMACH_XD0] = { diff --git a/arch/arm/mach-s3c2443/dma.c b/arch/arm/mach-s3c2443/dma.c index 4185c57b5dd0..872482f02685 100644 --- a/arch/arm/mach-s3c2443/dma.c +++ b/arch/arm/mach-s3c2443/dma.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #define MAP(x) { \ [0] = (x) | DMA_CH_VALID, \ diff --git a/arch/arm/plat-s3c24xx/devs.c b/arch/arm/plat-s3c24xx/devs.c index 07491bcd13ba..adf535aaf43a 100644 --- a/arch/arm/plat-s3c24xx/devs.c +++ b/arch/arm/plat-s3c24xx/devs.c @@ -33,7 +33,7 @@ #include #include -#include +#include /* Serial port registrations */ diff --git a/arch/arm/plat-s3c24xx/include/plat/regs-spi.h b/arch/arm/plat-s3c24xx/include/plat/regs-spi.h new file mode 100644 index 000000000000..2b35479ee35c --- /dev/null +++ b/arch/arm/plat-s3c24xx/include/plat/regs-spi.h @@ -0,0 +1,82 @@ +/* arch/arm/mach-s3c2410/include/mach/regs-spi.h + * + * Copyright (c) 2004 Fetron GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * S3C2410 SPI register definition +*/ + +#ifndef __ASM_ARCH_REGS_SPI_H +#define __ASM_ARCH_REGS_SPI_H + +#define S3C2410_SPI1 (0x20) +#define S3C2412_SPI1 (0x100) + +#define S3C2410_SPCON (0x00) + +#define S3C2412_SPCON_RXFIFO_RB2 (0<<14) +#define S3C2412_SPCON_RXFIFO_RB4 (1<<14) +#define S3C2412_SPCON_RXFIFO_RB12 (2<<14) +#define S3C2412_SPCON_RXFIFO_RB14 (3<<14) +#define S3C2412_SPCON_TXFIFO_RB2 (0<<12) +#define S3C2412_SPCON_TXFIFO_RB4 (1<<12) +#define S3C2412_SPCON_TXFIFO_RB12 (2<<12) +#define S3C2412_SPCON_TXFIFO_RB14 (3<<12) +#define S3C2412_SPCON_RXFIFO_RESET (1<<11) /* RxFIFO reset */ +#define S3C2412_SPCON_TXFIFO_RESET (1<<10) /* TxFIFO reset */ +#define S3C2412_SPCON_RXFIFO_EN (1<<9) /* RxFIFO Enable */ +#define S3C2412_SPCON_TXFIFO_EN (1<<8) /* TxFIFO Enable */ + +#define S3C2412_SPCON_DIRC_RX (1<<7) + +#define S3C2410_SPCON_SMOD_DMA (2<<5) /* DMA mode */ +#define S3C2410_SPCON_SMOD_INT (1<<5) /* interrupt mode */ +#define S3C2410_SPCON_SMOD_POLL (0<<5) /* polling mode */ +#define S3C2410_SPCON_ENSCK (1<<4) /* Enable SCK */ +#define S3C2410_SPCON_MSTR (1<<3) /* Master/Slave select + 0: slave, 1: master */ +#define S3C2410_SPCON_CPOL_HIGH (1<<2) /* Clock polarity select */ +#define S3C2410_SPCON_CPOL_LOW (0<<2) /* Clock polarity select */ + +#define S3C2410_SPCON_CPHA_FMTB (1<<1) /* Clock Phase Select */ +#define S3C2410_SPCON_CPHA_FMTA (0<<1) /* Clock Phase Select */ + +#define S3C2410_SPCON_TAGD (1<<0) /* Tx auto garbage data mode */ + + +#define S3C2410_SPSTA (0x04) + +#define S3C2412_SPSTA_RXFIFO_AE (1<<11) +#define S3C2412_SPSTA_TXFIFO_AE (1<<10) +#define S3C2412_SPSTA_RXFIFO_ERROR (1<<9) +#define S3C2412_SPSTA_TXFIFO_ERROR (1<<8) +#define S3C2412_SPSTA_RXFIFO_FIFO (1<<7) +#define S3C2412_SPSTA_RXFIFO_EMPTY (1<<6) +#define S3C2412_SPSTA_TXFIFO_NFULL (1<<5) +#define S3C2412_SPSTA_TXFIFO_EMPTY (1<<4) + +#define S3C2410_SPSTA_DCOL (1<<2) /* Data Collision Error */ +#define S3C2410_SPSTA_MULD (1<<1) /* Multi Master Error */ +#define S3C2410_SPSTA_READY (1<<0) /* Data Tx/Rx ready */ +#define S3C2412_SPSTA_READY_ORG (1<<3) + +#define S3C2410_SPPIN (0x08) + +#define S3C2410_SPPIN_ENMUL (1<<2) /* Multi Master Error detect */ +#define S3C2410_SPPIN_RESERVED (1<<1) +#define S3C2400_SPPIN_nCS (1<<1) /* SPI Card Select */ +#define S3C2410_SPPIN_KEEP (1<<0) /* Master Out keep */ + +#define S3C2410_SPPRE (0x0C) +#define S3C2410_SPTDAT (0x10) +#define S3C2410_SPRDAT (0x14) + +#define S3C2412_TXFIFO (0x18) +#define S3C2412_RXFIFO (0x18) +#define S3C2412_SPFIC (0x24) + + +#endif /* __ASM_ARCH_REGS_SPI_H */ diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c index c252cbac00f1..256d18395a23 100644 --- a/drivers/spi/spi_s3c24xx.c +++ b/drivers/spi/spi_s3c24xx.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include struct s3c24xx_spi { diff --git a/include/asm-arm/plat-s3c24xx/regs-spi.h b/include/asm-arm/plat-s3c24xx/regs-spi.h deleted file mode 100644 index 2b35479ee35c..000000000000 --- a/include/asm-arm/plat-s3c24xx/regs-spi.h +++ /dev/null @@ -1,82 +0,0 @@ -/* arch/arm/mach-s3c2410/include/mach/regs-spi.h - * - * Copyright (c) 2004 Fetron GmbH - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * S3C2410 SPI register definition -*/ - -#ifndef __ASM_ARCH_REGS_SPI_H -#define __ASM_ARCH_REGS_SPI_H - -#define S3C2410_SPI1 (0x20) -#define S3C2412_SPI1 (0x100) - -#define S3C2410_SPCON (0x00) - -#define S3C2412_SPCON_RXFIFO_RB2 (0<<14) -#define S3C2412_SPCON_RXFIFO_RB4 (1<<14) -#define S3C2412_SPCON_RXFIFO_RB12 (2<<14) -#define S3C2412_SPCON_RXFIFO_RB14 (3<<14) -#define S3C2412_SPCON_TXFIFO_RB2 (0<<12) -#define S3C2412_SPCON_TXFIFO_RB4 (1<<12) -#define S3C2412_SPCON_TXFIFO_RB12 (2<<12) -#define S3C2412_SPCON_TXFIFO_RB14 (3<<12) -#define S3C2412_SPCON_RXFIFO_RESET (1<<11) /* RxFIFO reset */ -#define S3C2412_SPCON_TXFIFO_RESET (1<<10) /* TxFIFO reset */ -#define S3C2412_SPCON_RXFIFO_EN (1<<9) /* RxFIFO Enable */ -#define S3C2412_SPCON_TXFIFO_EN (1<<8) /* TxFIFO Enable */ - -#define S3C2412_SPCON_DIRC_RX (1<<7) - -#define S3C2410_SPCON_SMOD_DMA (2<<5) /* DMA mode */ -#define S3C2410_SPCON_SMOD_INT (1<<5) /* interrupt mode */ -#define S3C2410_SPCON_SMOD_POLL (0<<5) /* polling mode */ -#define S3C2410_SPCON_ENSCK (1<<4) /* Enable SCK */ -#define S3C2410_SPCON_MSTR (1<<3) /* Master/Slave select - 0: slave, 1: master */ -#define S3C2410_SPCON_CPOL_HIGH (1<<2) /* Clock polarity select */ -#define S3C2410_SPCON_CPOL_LOW (0<<2) /* Clock polarity select */ - -#define S3C2410_SPCON_CPHA_FMTB (1<<1) /* Clock Phase Select */ -#define S3C2410_SPCON_CPHA_FMTA (0<<1) /* Clock Phase Select */ - -#define S3C2410_SPCON_TAGD (1<<0) /* Tx auto garbage data mode */ - - -#define S3C2410_SPSTA (0x04) - -#define S3C2412_SPSTA_RXFIFO_AE (1<<11) -#define S3C2412_SPSTA_TXFIFO_AE (1<<10) -#define S3C2412_SPSTA_RXFIFO_ERROR (1<<9) -#define S3C2412_SPSTA_TXFIFO_ERROR (1<<8) -#define S3C2412_SPSTA_RXFIFO_FIFO (1<<7) -#define S3C2412_SPSTA_RXFIFO_EMPTY (1<<6) -#define S3C2412_SPSTA_TXFIFO_NFULL (1<<5) -#define S3C2412_SPSTA_TXFIFO_EMPTY (1<<4) - -#define S3C2410_SPSTA_DCOL (1<<2) /* Data Collision Error */ -#define S3C2410_SPSTA_MULD (1<<1) /* Multi Master Error */ -#define S3C2410_SPSTA_READY (1<<0) /* Data Tx/Rx ready */ -#define S3C2412_SPSTA_READY_ORG (1<<3) - -#define S3C2410_SPPIN (0x08) - -#define S3C2410_SPPIN_ENMUL (1<<2) /* Multi Master Error detect */ -#define S3C2410_SPPIN_RESERVED (1<<1) -#define S3C2400_SPPIN_nCS (1<<1) /* SPI Card Select */ -#define S3C2410_SPPIN_KEEP (1<<0) /* Master Out keep */ - -#define S3C2410_SPPRE (0x0C) -#define S3C2410_SPTDAT (0x10) -#define S3C2410_SPRDAT (0x14) - -#define S3C2412_TXFIFO (0x18) -#define S3C2412_RXFIFO (0x18) -#define S3C2412_SPFIC (0x24) - - -#endif /* __ASM_ARCH_REGS_SPI_H */ -- cgit v1.2.3 From e3bd9ec5d8bfc90f9e1bd995677829e57a404061 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 30 Oct 2008 10:14:39 +0000 Subject: [ARM] S3C24XX: Move mci.h to arch/arm/plat-s3c24xx/include/plat Move mci.h to new position in arch/arm/plat-s3c24xx/include/plat ready to clean out old include directories. Signed-off-by: Ben Dooks --- arch/arm/plat-s3c24xx/include/plat/mci.h | 15 +++++++++++++++ drivers/mmc/host/s3cmci.c | 2 +- include/asm-arm/plat-s3c24xx/mci.h | 15 --------------- 3 files changed, 16 insertions(+), 16 deletions(-) create mode 100644 arch/arm/plat-s3c24xx/include/plat/mci.h delete mode 100644 include/asm-arm/plat-s3c24xx/mci.h (limited to 'include') diff --git a/arch/arm/plat-s3c24xx/include/plat/mci.h b/arch/arm/plat-s3c24xx/include/plat/mci.h new file mode 100644 index 000000000000..2d0852ac3b27 --- /dev/null +++ b/arch/arm/plat-s3c24xx/include/plat/mci.h @@ -0,0 +1,15 @@ +#ifndef _ARCH_MCI_H +#define _ARCH_MCI_H + +struct s3c24xx_mci_pdata { + unsigned int wprotect_invert : 1; + unsigned int detect_invert : 1; /* set => detect active high. */ + + unsigned int gpio_detect; + unsigned int gpio_wprotect; + unsigned long ocr_avail; + void (*set_power)(unsigned char power_mode, + unsigned short vdd); +}; + +#endif /* _ARCH_NCI_H */ diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c index 3b2085b57769..fcc98a4cce3c 100644 --- a/drivers/mmc/host/s3cmci.c +++ b/drivers/mmc/host/s3cmci.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include "s3cmci.h" diff --git a/include/asm-arm/plat-s3c24xx/mci.h b/include/asm-arm/plat-s3c24xx/mci.h deleted file mode 100644 index 2d0852ac3b27..000000000000 --- a/include/asm-arm/plat-s3c24xx/mci.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _ARCH_MCI_H -#define _ARCH_MCI_H - -struct s3c24xx_mci_pdata { - unsigned int wprotect_invert : 1; - unsigned int detect_invert : 1; /* set => detect active high. */ - - unsigned int gpio_detect; - unsigned int gpio_wprotect; - unsigned long ocr_avail; - void (*set_power)(unsigned char power_mode, - unsigned short vdd); -}; - -#endif /* _ARCH_NCI_H */ -- cgit v1.2.3 From 12ef193d5817504621e503e78d641265f6a86ac4 Mon Sep 17 00:00:00 2001 From: Troy Kisky Date: Mon, 13 Oct 2008 17:42:14 -0700 Subject: ASoC: Allow setting codec register with debugfs filesystem i.e. echo 6 59 >/sys/kernel/debug/soc-audio.0/codec_reg will set register 0x06 to a value of 0x59. Also, pop_time debugfs interface setup is moved so that it is setup in the same function as codec_reg Signed-off-by: Troy Kisky Signed-off-by: Mark Brown --- include/sound/soc.h | 4 ++ sound/soc/soc-core.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++++-- sound/soc/soc-dapm.c | 33 ++++----------- 3 files changed, 125 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index a1e0357a84d7..d33825d624a5 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -425,6 +425,7 @@ struct snd_soc_codec { short reg_cache_step; /* dapm */ + u32 pop_time; struct list_head dapm_widgets; struct list_head dapm_paths; enum snd_soc_bias_level bias_level; @@ -516,6 +517,9 @@ struct snd_soc_device { struct delayed_work delayed_work; struct work_struct deferred_resume_work; void *codec_data; +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_root; +#endif }; /* runtime channel data */ diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 462e635dfc74..4707042b3dad 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -961,10 +962,8 @@ static int soc_new_pcm(struct snd_soc_device *socdev, } /* codec register dump */ -static ssize_t codec_reg_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t soc_codec_reg_show(struct snd_soc_device *devdata, char *buf) { - struct snd_soc_device *devdata = dev_get_drvdata(dev); struct snd_soc_codec *codec = devdata->codec; int i, step = 1, count = 0; @@ -1001,8 +1000,117 @@ static ssize_t codec_reg_show(struct device *dev, return count; } +static ssize_t codec_reg_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct snd_soc_device *devdata = dev_get_drvdata(dev); + return soc_codec_reg_show(devdata, buf); +} + static DEVICE_ATTR(codec_reg, 0444, codec_reg_show, NULL); +#ifdef CONFIG_DEBUG_FS +static int codec_reg_open_file(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return 0; +} + +static ssize_t codec_reg_read_file(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + ssize_t ret; + struct snd_soc_device *devdata = file->private_data; + char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + ret = soc_codec_reg_show(devdata, buf); + if (ret >= 0) + ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret); + kfree(buf); + return ret; +} + +static ssize_t codec_reg_write_file(struct file *file, + const char __user *user_buf, size_t count, loff_t *ppos) +{ + char buf[32]; + int buf_size; + char *start = buf; + unsigned long reg, value; + int step = 1; + struct snd_soc_device *devdata = file->private_data; + struct snd_soc_codec *codec = devdata->codec; + + buf_size = min(count, (sizeof(buf)-1)); + if (copy_from_user(buf, user_buf, buf_size)) + return -EFAULT; + buf[buf_size] = 0; + + if (codec->reg_cache_step) + step = codec->reg_cache_step; + + while (*start == ' ') + start++; + reg = simple_strtoul(start, &start, 16); + if ((reg >= codec->reg_cache_size) || (reg % step)) + return -EINVAL; + while (*start == ' ') + start++; + if (strict_strtoul(start, 16, &value)) + return -EINVAL; + codec->write(codec, reg, value); + return buf_size; +} + +static const struct file_operations codec_reg_fops = { + .open = codec_reg_open_file, + .read = codec_reg_read_file, + .write = codec_reg_write_file, +}; + +static void soc_init_debugfs(struct snd_soc_device *socdev) +{ + struct dentry *root, *file; + struct snd_soc_codec *codec = socdev->codec; + root = debugfs_create_dir(dev_name(socdev->dev), NULL); + if (IS_ERR(root) || !root) + goto exit1; + + file = debugfs_create_file("codec_reg", 0644, + root, socdev, &codec_reg_fops); + if (!file) + goto exit2; + + file = debugfs_create_u32("dapm_pop_time", 0744, + root, &codec->pop_time); + if (!file) + goto exit2; + socdev->debugfs_root = root; + return; +exit2: + debugfs_remove_recursive(root); +exit1: + dev_err(socdev->dev, "debugfs is not available\n"); +} + +static void soc_cleanup_debugfs(struct snd_soc_device *socdev) +{ + debugfs_remove_recursive(socdev->debugfs_root); + socdev->debugfs_root = NULL; +} + +#else + +static inline void soc_init_debugfs(struct snd_soc_device *socdev) +{ +} + +static inline void soc_cleanup_debugfs(struct snd_soc_device *socdev) +{ +} +#endif + /** * snd_soc_new_ac97_codec - initailise AC97 device * @codec: audio codec @@ -1216,6 +1324,7 @@ int snd_soc_register_card(struct snd_soc_device *socdev) if (err < 0) printk(KERN_WARNING "asoc: failed to add codec sysfs files\n"); + soc_init_debugfs(socdev); mutex_unlock(&codec->mutex); out: @@ -1239,6 +1348,7 @@ void snd_soc_free_pcms(struct snd_soc_device *socdev) #endif mutex_lock(&codec->mutex); + soc_cleanup_debugfs(socdev); #ifdef CONFIG_SND_SOC_AC97_BUS for (i = 0; i < codec->num_dai; i++) { codec_dai = &codec->dai[i]; diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 7e9f423f5b09..b51d82285be4 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -67,17 +66,13 @@ static int dapm_status = 1; module_param(dapm_status, int, 0); MODULE_PARM_DESC(dapm_status, "enable DPM sysfs entries"); -static struct dentry *asoc_debugfs; - -static u32 pop_time; - -static void pop_wait(void) +static void pop_wait(u32 pop_time) { if (pop_time) schedule_timeout_uninterruptible(msecs_to_jiffies(pop_time)); } -static void pop_dbg(const char *fmt, ...) +static void pop_dbg(u32 pop_time, const char *fmt, ...) { va_list args; @@ -85,7 +80,7 @@ static void pop_dbg(const char *fmt, ...) if (pop_time) { vprintk(fmt, args); - pop_wait(); + pop_wait(pop_time); } va_end(args); @@ -230,10 +225,11 @@ static int dapm_update_bits(struct snd_soc_dapm_widget *widget) change = old != new; if (change) { - pop_dbg("pop test %s : %s in %d ms\n", widget->name, - widget->power ? "on" : "off", pop_time); + pop_dbg(codec->pop_time, "pop test %s : %s in %d ms\n", + widget->name, widget->power ? "on" : "off", + codec->pop_time); snd_soc_write(codec, widget->reg, new); - pop_wait(); + pop_wait(codec->pop_time); } pr_debug("reg %x old %x new %x change %d\n", widget->reg, old, new, change); @@ -821,23 +817,13 @@ static DEVICE_ATTR(dapm_widget, 0444, dapm_widget_show, NULL); int snd_soc_dapm_sys_add(struct device *dev) { - int ret = 0; - if (!dapm_status) return 0; ret = device_create_file(dev, &dev_attr_dapm_widget); if (ret != 0) return ret; - - asoc_debugfs = debugfs_create_dir("asoc", NULL); - if (!IS_ERR(asoc_debugfs) && asoc_debugfs) - debugfs_create_u32("dapm_pop_time", 0744, asoc_debugfs, - &pop_time); - else - asoc_debugfs = NULL; - - return 0; + return device_create_file(dev, &dev_attr_dapm_widget); } static void snd_soc_dapm_sys_remove(struct device *dev) @@ -845,9 +831,6 @@ static void snd_soc_dapm_sys_remove(struct device *dev) if (dapm_status) { device_remove_file(dev, &dev_attr_dapm_widget); } - - if (asoc_debugfs) - debugfs_remove_recursive(asoc_debugfs); } /* free all dapm widgets and resources */ -- cgit v1.2.3 From d98d38f2014ab79f28c126ff175d034891f7aefc Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 29 Oct 2008 14:24:09 -0700 Subject: mutex: improve header comment to be actually informative about the API Impact: improve documentation It's nice to say that mutex_trylock follows the spin_trylock convention. It's a lot nicer if the comment also says which that is... make it so. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton --- include/linux/mutex.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mutex.h b/include/linux/mutex.h index bc6da10ceee0..7a0e5c4f8072 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -144,6 +144,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); /* * NOTE: mutex_trylock() follows the spin_trylock() convention, * not the down_trylock() convention! + * + * Returns 1 if the mutex has been acquired successfully, and 0 on contention. */ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); -- cgit v1.2.3 From 17666f02b118099028522dfc3df00a235700e216 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 30 Oct 2008 16:08:32 -0400 Subject: ftrace: nmi safe code modification Impact: fix crashes that can occur in NMI handlers, if their code is modified Modifying code is something that needs special care. On SMP boxes, if code that is being modified is also being executed on another CPU, that CPU will have undefined results. The dynamic ftrace uses kstop_machine to make the system act like a uniprocessor system. But this does not address NMIs, that can still run on other CPUs. One approach to handle this is to make all code that are used by NMIs not be traced. But NMIs can call notifiers that spread throughout the kernel and this will be very hard to maintain, and the chance of missing a function is very high. The approach that this patch takes is to have the NMIs modify the code if the modification is taking place. The way this works is that just writing to code executing on another CPU is not harmful if what is written is the same as what exists. Two buffers are used: an IP buffer and a "code" buffer. The steps that the patcher takes are: 1) Put in the instruction pointer into the IP buffer and the new code into the "code" buffer. 2) Set a flag that says we are modifying code 3) Wait for any running NMIs to finish. 4) Write the code 5) clear the flag. 6) Wait for any running NMIs to finish. If an NMI is executed, it will also write the pending code. Multiple writes are OK, because what is being written is the same. Then the patcher must wait for all running NMIs to finish before going to the next line that must be patched. This is basically the RCU approach to code modification. Thanks to Ingo Molnar for suggesting the idea, and to Arjan van de Ven for his guidence on what is safe and what is not. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/arm/include/asm/ftrace.h | 5 ++ arch/powerpc/include/asm/ftrace.h | 5 ++ arch/sh/include/asm/ftrace.h | 5 ++ arch/sparc/include/asm/ftrace.h | 5 ++ arch/x86/include/asm/ftrace.h | 15 ++++++ arch/x86/kernel/ftrace.c | 107 +++++++++++++++++++++++++++++++++++++- include/linux/hardirq.h | 15 +++++- 7 files changed, 154 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 39c8bc1a006a..d4c24a7a9280 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -1,6 +1,11 @@ #ifndef _ASM_ARM_FTRACE #define _ASM_ARM_FTRACE +#ifndef __ASSEMBLY__ +#define ftrace_nmi_enter() do { } while (0) +#define ftrace_nmi_exit() do { } while (0) +#endif + #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index b298f7a631e6..7652755dc000 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -1,6 +1,11 @@ #ifndef _ASM_POWERPC_FTRACE #define _ASM_POWERPC_FTRACE +#ifndef __ASSEMBLY__ +#define ftrace_nmi_enter() do { } while (0) +#define ftrace_nmi_exit() do { } while (0) +#endif + #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((long)(_mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h index 3aed362c9463..cdf2cb0b9ffe 100644 --- a/arch/sh/include/asm/ftrace.h +++ b/arch/sh/include/asm/ftrace.h @@ -1,6 +1,11 @@ #ifndef __ASM_SH_FTRACE_H #define __ASM_SH_FTRACE_H +#ifndef __ASSEMBLY__ +#define ftrace_nmi_enter() do { } while (0) +#define ftrace_nmi_exit() do { } while (0) +#endif + #ifndef __ASSEMBLY__ extern void mcount(void); #endif diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h index d27716cd38c1..33a95feeb137 100644 --- a/arch/sparc/include/asm/ftrace.h +++ b/arch/sparc/include/asm/ftrace.h @@ -1,6 +1,11 @@ #ifndef _ASM_SPARC64_FTRACE #define _ASM_SPARC64_FTRACE +#ifndef __ASSEMBLY__ +#define ftrace_nmi_enter() do { } while (0) +#define ftrace_nmi_exit() do { } while (0) +#endif + #ifdef CONFIG_MCOUNT #define MCOUNT_ADDR ((long)(_mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9e8bc29b8b17..f2ed6b704a75 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -17,6 +17,21 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) */ return addr - 1; } + +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_nmi_enter(void); +extern void ftrace_nmi_exit(void); +#else +#define ftrace_nmi_enter() do { } while (0) +#define ftrace_nmi_exit() do { } while (0) +#endif +#endif + +#else /* CONFIG_FUNCTION_TRACER */ + +#ifndef __ASSEMBLY__ +#define ftrace_nmi_enter() do { } while (0) +#define ftrace_nmi_exit() do { } while (0) #endif #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 50ea0ac8c9bf..fe5f859130b5 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -56,6 +56,111 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) return calc.code; } +/* + * Modifying code must take extra care. On an SMP machine, if + * the code being modified is also being executed on another CPU + * that CPU will have undefined results and possibly take a GPF. + * We use kstop_machine to stop other CPUS from exectuing code. + * But this does not stop NMIs from happening. We still need + * to protect against that. We separate out the modification of + * the code to take care of this. + * + * Two buffers are added: An IP buffer and a "code" buffer. + * + * 1) Put in the instruction pointer into the IP buffer + * and the new code into the "code" buffer. + * 2) Set a flag that says we are modifying code + * 3) Wait for any running NMIs to finish. + * 4) Write the code + * 5) clear the flag. + * 6) Wait for any running NMIs to finish. + * + * If an NMI is executed, the first thing it does is to call + * "ftrace_nmi_enter". This will check if the flag is set to write + * and if it is, it will write what is in the IP and "code" buffers. + * + * The trick is, it does not matter if everyone is writing the same + * content to the code location. Also, if a CPU is executing code + * it is OK to write to that code location if the contents being written + * are the same as what exists. + */ + +static atomic_t in_nmi; +static int mod_code_status; +static int mod_code_write; +static void *mod_code_ip; +static void *mod_code_newcode; + +static void ftrace_mod_code(void) +{ + /* + * Yes, more than one CPU process can be writing to mod_code_status. + * (and the code itself) + * But if one were to fail, then they all should, and if one were + * to succeed, then they all should. + */ + mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, + MCOUNT_INSN_SIZE); + +} + +void ftrace_nmi_enter(void) +{ + atomic_inc(&in_nmi); + /* Must have in_nmi seen before reading write flag */ + smp_mb(); + if (mod_code_write) + ftrace_mod_code(); +} + +void ftrace_nmi_exit(void) +{ + /* Finish all executions before clearing in_nmi */ + smp_wmb(); + atomic_dec(&in_nmi); +} + +static void wait_for_nmi(void) +{ + while (atomic_read(&in_nmi)) + cpu_relax(); +} + +static int +do_ftrace_mod_code(unsigned long ip, void *new_code) +{ + mod_code_ip = (void *)ip; + mod_code_newcode = new_code; + + /* The buffers need to be visible before we let NMIs write them */ + smp_wmb(); + + mod_code_write = 1; + + /* Make sure write bit is visible before we wait on NMIs */ + smp_mb(); + + wait_for_nmi(); + + /* Make sure all running NMIs have finished before we write the code */ + smp_mb(); + + ftrace_mod_code(); + + /* Make sure the write happens before clearing the bit */ + smp_wmb(); + + mod_code_write = 0; + + /* make sure NMIs see the cleared bit */ + smp_mb(); + + wait_for_nmi(); + + return mod_code_status; +} + + int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) @@ -81,7 +186,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return -EINVAL; /* replace the text with the new text */ - if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) + if (do_ftrace_mod_code(ip, new_code)) return -EPERM; sync_core(); diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 181006cc94a0..0087cb43becf 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -5,6 +5,7 @@ #include #include #include +#include #include /* @@ -161,7 +162,17 @@ extern void irq_enter(void); */ extern void irq_exit(void); -#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) -#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) +#define nmi_enter() \ + do { \ + ftrace_nmi_enter(); \ + lockdep_off(); \ + __irq_enter(); \ + } while (0) +#define nmi_exit() \ + do { \ + __irq_exit(); \ + lockdep_on(); \ + ftrace_nmi_exit(); \ + } while (0) #endif /* LINUX_HARDIRQ_H */ -- cgit v1.2.3 From cc0fe83525d734bdd9c883b45eca6bb22f286daa Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 31 Oct 2008 00:42:25 -0700 Subject: xfrm: remove unused struct xfrm_policy::next Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f2c5ba28a428..45e11b3631e4 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -467,7 +467,6 @@ struct xfrm_policy_walk { struct xfrm_policy { - struct xfrm_policy *next; struct hlist_node bydst; struct hlist_node byidx; -- cgit v1.2.3 From 90d841fd0a5e02affd4e2bbdde4f710c61599281 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Fri, 31 Oct 2008 00:43:45 -0700 Subject: pkt_sched: sch_generic: Add Qdisc_ops peek() method. Add Qdisc_ops peek() method in order to replace requeuing. Based on ideas and patches of Herbert Xu, Patrick McHardy and David S. Miller. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 3fe49d808957..f81f7c4d76fa 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -111,6 +111,7 @@ struct Qdisc_ops int (*enqueue)(struct sk_buff *, struct Qdisc *); struct sk_buff * (*dequeue)(struct Qdisc *); + struct sk_buff * (*peek)(struct Qdisc *); int (*requeue)(struct sk_buff *, struct Qdisc *); unsigned int (*drop)(struct Qdisc *); -- cgit v1.2.3 From 48a8f519e0fe22a5c98523286b2a120841a11dd5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 31 Oct 2008 00:44:18 -0700 Subject: pkt_sched: Add ->peek() methods for fifo, prio and SFQ qdiscs. From: Patrick McHardy Just as a demonstration how easy adding a peek operation to the work-conserving qdiscs actually is. It doesn't need to keep or change any internal state in many cases thanks to the guarantee that the packet will either be dequeued or, if another packet arrives, the upper qdisc will immediately ->peek again to reevaluate the state. (This is only slightly modified Patrick's patch.) Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 5 +++++ net/sched/sch_fifo.c | 2 ++ net/sched/sch_prio.c | 14 ++++++++++++++ net/sched/sch_sfq.c | 12 ++++++++++++ 4 files changed, 33 insertions(+) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f81f7c4d76fa..da6839a7ff50 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -433,6 +433,11 @@ static inline struct sk_buff *qdisc_dequeue_tail(struct Qdisc *sch) return __qdisc_dequeue_tail(sch, &sch->q); } +static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) +{ + return skb_peek(&sch->q); +} + static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff_head *list) { diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 23d258bfe8ac..8825e8806f41 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -83,6 +83,7 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .priv_size = sizeof(struct fifo_sched_data), .enqueue = pfifo_enqueue, .dequeue = qdisc_dequeue_head, + .peek = qdisc_peek_head, .requeue = qdisc_requeue, .drop = qdisc_queue_drop, .init = fifo_init, @@ -98,6 +99,7 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .priv_size = sizeof(struct fifo_sched_data), .enqueue = bfifo_enqueue, .dequeue = qdisc_dequeue_head, + .peek = qdisc_peek_head, .requeue = qdisc_requeue, .drop = qdisc_queue_drop, .init = fifo_init, diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 504a78cdb718..3651da3e2802 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -120,6 +120,19 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch) return ret; } +static struct sk_buff *prio_peek(struct Qdisc *sch) +{ + struct prio_sched_data *q = qdisc_priv(sch); + int prio; + + for (prio = 0; prio < q->bands; prio++) { + struct Qdisc *qdisc = q->queues[prio]; + struct sk_buff *skb = qdisc->ops->peek(qdisc); + if (skb) + return skb; + } + return NULL; +} static struct sk_buff *prio_dequeue(struct Qdisc* sch) { @@ -421,6 +434,7 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = { .priv_size = sizeof(struct prio_sched_data), .enqueue = prio_enqueue, .dequeue = prio_dequeue, + .peek = prio_peek, .requeue = prio_requeue, .drop = prio_drop, .init = prio_init, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index fe1508ef0d3d..198b83d42ba8 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -391,8 +391,19 @@ sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_CN; } +static struct sk_buff * +sfq_peek(struct Qdisc *sch) +{ + struct sfq_sched_data *q = qdisc_priv(sch); + sfq_index a; + /* No active slots */ + if (q->tail == SFQ_DEPTH) + return NULL; + a = q->next[q->tail]; + return skb_peek(&q->qs[a]); +} static struct sk_buff * sfq_dequeue(struct Qdisc *sch) @@ -624,6 +635,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = { .priv_size = sizeof(struct sfq_sched_data), .enqueue = sfq_enqueue, .dequeue = sfq_dequeue, + .peek = sfq_peek, .requeue = sfq_requeue, .drop = sfq_drop, .init = sfq_init, -- cgit v1.2.3 From 77be155cba4e163e8bba9fd27222a8b6189ec4f7 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Fri, 31 Oct 2008 00:47:01 -0700 Subject: pkt_sched: Add peek emulation for non-work-conserving qdiscs. This patch adds qdisc_peek_dequeued() wrapper to emulate peek method with qdisc->dequeue() and storing "peeked" skb in qdisc->gso_skb until dequeuing. This is mainly for compatibility reasons not to break some strange configs because peeking is expected for non-work-conserving parent qdiscs to query work-conserving child qdiscs. This implementation requires using qdisc_dequeue_peeked() wrapper instead of directly calling qdisc->dequeue() for all qdiscs ever querried with qdisc->ops->peek() or qdisc_peek_dequeued(). Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 23 +++++++++++++++++++++++ net/sched/sch_atm.c | 4 ++-- net/sched/sch_cbq.c | 1 + net/sched/sch_hfsc.c | 3 ++- net/sched/sch_htb.c | 1 + net/sched/sch_netem.c | 5 +++-- net/sched/sch_tbf.c | 3 ++- 7 files changed, 34 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index da6839a7ff50..9dcb5bfe094a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -438,6 +438,29 @@ static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) return skb_peek(&sch->q); } +/* generic pseudo peek method for non-work-conserving qdisc */ +static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) +{ + /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ + if (!sch->gso_skb) + sch->gso_skb = sch->dequeue(sch); + + return sch->gso_skb; +} + +/* use instead of qdisc->dequeue() for all qdiscs queried with ->peek() */ +static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) +{ + struct sk_buff *skb = sch->gso_skb; + + if (skb) + sch->gso_skb = NULL; + else + skb = sch->dequeue(sch); + + return skb; +} + static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff_head *list) { diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 2ee0c1a8efa9..6eb9a650b63d 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -484,7 +484,7 @@ static void sch_atm_dequeue(unsigned long data) if (!atm_may_send(flow->vcc, skb->truesize)) break; - skb = flow->q->dequeue(flow->q); + skb = qdisc_dequeue_peeked(flow->q); if (unlikely(!skb)) break; @@ -519,7 +519,7 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch) pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); tasklet_schedule(&p->task); - skb = p->link.q->dequeue(p->link.q); + skb = qdisc_dequeue_peeked(p->link.q); if (skb) sch->q.qlen--; return skb; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 03e389e8d945..63efa70abbea 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -2066,6 +2066,7 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = { .priv_size = sizeof(struct cbq_sched_data), .enqueue = cbq_enqueue, .dequeue = cbq_dequeue, + .peek = qdisc_peek_dequeued, .requeue = cbq_requeue, .drop = cbq_drop, .init = cbq_init, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index ddfc40887848..d90b1652f2af 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1634,7 +1634,7 @@ hfsc_dequeue(struct Qdisc *sch) } } - skb = cl->qdisc->dequeue(cl->qdisc); + skb = qdisc_dequeue_peeked(cl->qdisc); if (skb == NULL) { if (net_ratelimit()) printk("HFSC: Non-work-conserving qdisc ?\n"); @@ -1727,6 +1727,7 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = { .dump = hfsc_dump_qdisc, .enqueue = hfsc_enqueue, .dequeue = hfsc_dequeue, + .peek = qdisc_peek_dequeued, .requeue = hfsc_requeue, .drop = hfsc_drop, .cl_ops = &hfsc_class_ops, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d14f02056ae6..3fda8199713d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1565,6 +1565,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .priv_size = sizeof(struct htb_sched), .enqueue = htb_enqueue, .dequeue = htb_dequeue, + .peek = qdisc_peek_dequeued, .requeue = htb_requeue, .drop = htb_drop, .init = htb_init, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 74fbdb52baed..3080bd6ee332 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -290,8 +290,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) /* if more time remaining? */ if (cb->time_to_send <= now) { - skb = q->qdisc->dequeue(q->qdisc); - if (!skb) + skb = qdisc_dequeue_peeked(q->qdisc); + if (unlikely(!skb)) return NULL; pr_debug("netem_dequeue: return skb=%p\n", skb); @@ -714,6 +714,7 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = { .priv_size = sizeof(struct netem_sched_data), .enqueue = netem_enqueue, .dequeue = netem_dequeue, + .peek = qdisc_peek_dequeued, .requeue = netem_requeue, .drop = netem_drop, .init = netem_init, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 61fdc77a48d2..435076cf620e 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -192,7 +192,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) toks -= L2T(q, len); if ((toks|ptoks) >= 0) { - skb = q->qdisc->dequeue(q->qdisc); + skb = qdisc_dequeue_peeked(q->qdisc); if (unlikely(!skb)) return NULL; @@ -467,6 +467,7 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = { .priv_size = sizeof(struct tbf_sched_data), .enqueue = tbf_enqueue, .dequeue = tbf_dequeue, + .peek = qdisc_peek_dequeued, .requeue = tbf_requeue, .drop = tbf_drop, .init = tbf_init, -- cgit v1.2.3 From 3685f25de1b0447fff381c420de1e25bd57c9efb Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 31 Oct 2008 00:56:49 -0700 Subject: misc: replace NIPQUAD() Using NIPQUAD() with NIPQUAD_FMT, %d.%d.%d.%d or %u.%u.%u.%u can be replaced with %pI4 Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- include/linux/sunrpc/svc_xprt.h | 4 ++-- include/net/ip_vs.h | 4 ++-- include/net/netfilter/nf_conntrack_tuple.h | 6 +++--- include/net/sctp/sctp.h | 4 ++-- security/selinux/avc.c | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 51cb75ea42d5..0127daca4354 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -139,8 +139,8 @@ static inline char *__svc_print_addr(struct sockaddr *addr, { switch (addr->sa_family) { case AF_INET: - snprintf(buf, len, "%u.%u.%u.%u, port=%u", - NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), + snprintf(buf, len, "%pI4, port=%u", + &((struct sockaddr_in *)addr)->sin_addr, ntohs(((struct sockaddr_in *) addr)->sin_port)); break; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index af48cada561e..fc63353779f0 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -91,8 +91,8 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, &addr->in6) + 1; else #endif - len = snprintf(&buf[*idx], buf_len - *idx, NIPQUAD_FMT, - NIPQUAD(addr->ip)) + 1; + len = snprintf(&buf[*idx], buf_len - *idx, "%pI4", + &addr->ip) + 1; *idx += len; BUG_ON(*idx > buf_len + 1); diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index 42f1fc96f3ec..f2f6aa73dc10 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -112,10 +112,10 @@ struct nf_conntrack_tuple_mask static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t) { #ifdef DEBUG - printk("tuple %p: %u " NIPQUAD_FMT ":%hu -> " NIPQUAD_FMT ":%hu\n", + printk("tuple %p: %u %pI4:%hu -> %pI4:%hu\n", t, t->dst.protonum, - NIPQUAD(t->src.u3.ip), ntohs(t->src.u.all), - NIPQUAD(t->dst.u3.ip), ntohs(t->dst.u.all)); + &t->src.u3.ip, ntohs(t->src.u.all), + &t->dst.u3.ip, ntohs(t->dst.u.all)); #endif } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index e71b0f7ce88e..23797506f593 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -291,9 +291,9 @@ extern int sctp_debug_flag; otherparms); \ } else { \ printk(KERN_DEBUG \ - lead NIPQUAD_FMT trail, \ + lead "%pI4" trail, \ leadparm, \ - NIPQUAD(saddr->v4.sin_addr.s_addr), \ + &saddr->v4.sin_addr.s_addr, \ otherparms); \ } \ } diff --git a/security/selinux/avc.c b/security/selinux/avc.c index ed6af12cdf43..d43bd6baeeaa 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -504,7 +504,7 @@ static inline void avc_print_ipv4_addr(struct audit_buffer *ab, __be32 addr, __be16 port, char *name1, char *name2) { if (addr) - audit_log_format(ab, " %s=" NIPQUAD_FMT, name1, NIPQUAD(addr)); + audit_log_format(ab, " %s=%pI4", name1, &addr); if (port) audit_log_format(ab, " %s=%d", name2, ntohs(port)); } -- cgit v1.2.3 From 92be3d6bdf2cb34972ab50e12ad4da1076e690da Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 31 Oct 2008 09:48:08 +0800 Subject: kexec/i386: allocate page table pages dynamically Impact: save .text size when kexec is built in but not loaded This patch adds an architecture specific struct kimage_arch into struct kimage. The pointers to page table pages used by kexec are added to struct kimage_arch. The page tables pages are dynamically allocated in machine_kexec_prepare instead of statically from BSS segment. This will save up to 20k memory when kexec image is not loaded. Signed-off-by: Huang Ying Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kexec.h | 14 ++++++++ arch/x86/kernel/machine_kexec_32.c | 67 ++++++++++++++++++++++++++------------ include/linux/kexec.h | 4 +++ 3 files changed, 64 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index a1f22771a15a..df9c41a9c6ae 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -170,6 +170,20 @@ relocate_kernel(unsigned long indirection_page, unsigned long start_address) ATTRIB_NORET; #endif +#ifdef CONFIG_X86_32 +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + pgd_t *pgd; +#ifdef CONFIG_X86_PAE + pmd_t *pmd0; + pmd_t *pmd1; +#endif + pte_t *pte0; + pte_t *pte1; +}; +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_KEXEC_H */ diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 7a385746509a..1100312847a5 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -25,15 +26,6 @@ #include #include -#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) -static u32 kexec_pgd[1024] PAGE_ALIGNED; -#ifdef CONFIG_X86_PAE -static u32 kexec_pmd0[1024] PAGE_ALIGNED; -static u32 kexec_pmd1[1024] PAGE_ALIGNED; -#endif -static u32 kexec_pte0[1024] PAGE_ALIGNED; -static u32 kexec_pte1[1024] PAGE_ALIGNED; - static void set_idt(void *newidt, __u16 limit) { struct desc_ptr curidt; @@ -76,6 +68,37 @@ static void load_segments(void) #undef __STR } +static void machine_kexec_free_page_tables(struct kimage *image) +{ + free_page((unsigned long)image->arch.pgd); +#ifdef CONFIG_X86_PAE + free_page((unsigned long)image->arch.pmd0); + free_page((unsigned long)image->arch.pmd1); +#endif + free_page((unsigned long)image->arch.pte0); + free_page((unsigned long)image->arch.pte1); +} + +static int machine_kexec_alloc_page_tables(struct kimage *image) +{ + image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); +#ifdef CONFIG_X86_PAE + image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL); + image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL); +#endif + image->arch.pte0 = (pte_t *)get_zeroed_page(GFP_KERNEL); + image->arch.pte1 = (pte_t *)get_zeroed_page(GFP_KERNEL); + if (!image->arch.pgd || +#ifdef CONFIG_X86_PAE + !image->arch.pmd0 || !image->arch.pmd1 || +#endif + !image->arch.pte0 || !image->arch.pte1) { + machine_kexec_free_page_tables(image); + return -ENOMEM; + } + return 0; +} + /* * A architecture hook called to validate the * proposed image and prepare the control pages @@ -87,13 +110,14 @@ static void load_segments(void) * reboot code buffer to allow us to avoid allocations * later. * - * Make control page executable. + * - Make control page executable. + * - Allocate page tables */ int machine_kexec_prepare(struct kimage *image) { if (nx_enabled) set_pages_x(image->control_code_page, 1); - return 0; + return machine_kexec_alloc_page_tables(image); } /* @@ -104,6 +128,7 @@ void machine_kexec_cleanup(struct kimage *image) { if (nx_enabled) set_pages_nx(image->control_code_page, 1); + machine_kexec_free_page_tables(image); } /* @@ -150,18 +175,18 @@ void machine_kexec(struct kimage *image) relocate_kernel_ptr = control_page; page_list[PA_CONTROL_PAGE] = __pa(control_page); page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; - page_list[PA_PGD] = __pa(kexec_pgd); - page_list[VA_PGD] = (unsigned long)kexec_pgd; + page_list[PA_PGD] = __pa(image->arch.pgd); + page_list[VA_PGD] = (unsigned long)image->arch.pgd; #ifdef CONFIG_X86_PAE - page_list[PA_PMD_0] = __pa(kexec_pmd0); - page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; - page_list[PA_PMD_1] = __pa(kexec_pmd1); - page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; + page_list[PA_PMD_0] = __pa(image->arch.pmd0); + page_list[VA_PMD_0] = (unsigned long)image->arch.pmd0; + page_list[PA_PMD_1] = __pa(image->arch.pmd1); + page_list[VA_PMD_1] = (unsigned long)image->arch.pmd1; #endif - page_list[PA_PTE_0] = __pa(kexec_pte0); - page_list[VA_PTE_0] = (unsigned long)kexec_pte0; - page_list[PA_PTE_1] = __pa(kexec_pte1); - page_list[VA_PTE_1] = (unsigned long)kexec_pte1; + page_list[PA_PTE_0] = __pa(image->arch.pte0); + page_list[VA_PTE_0] = (unsigned long)image->arch.pte0; + page_list[PA_PTE_1] = __pa(image->arch.pte1); + page_list[VA_PTE_1] = (unsigned long)image->arch.pte1; if (image->type == KEXEC_TYPE_DEFAULT) page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 17f76fc05173..adc34f2c6eff 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -100,6 +100,10 @@ struct kimage { #define KEXEC_TYPE_DEFAULT 0 #define KEXEC_TYPE_CRASH 1 unsigned int preserve_context : 1; + +#ifdef ARCH_HAS_KIMAGE_ARCH + struct kimage_arch arch; +#endif }; -- cgit v1.2.3 From a26a2a27396c0a0877aa701f8f92d08ba550a6c9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 31 Oct 2008 00:03:22 -0400 Subject: ftrace: nmi safe code clean ups Impact: cleanup This patch cleans up the NMI safe code for dynamic ftrace as suggested by Andrew Morton. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/arm/include/asm/ftrace.h | 4 ++-- arch/powerpc/include/asm/ftrace.h | 4 ++-- arch/sh/include/asm/ftrace.h | 4 ++-- arch/sparc/include/asm/ftrace.h | 4 ++-- arch/x86/include/asm/ftrace.h | 10 +++++----- arch/x86/kernel/ftrace.c | 16 ++++++++-------- include/linux/ftrace.h | 3 +++ kernel/trace/trace.c | 9 ++++----- 8 files changed, 28 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index d4c24a7a9280..3f3a1d1508ea 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -2,8 +2,8 @@ #define _ASM_ARM_FTRACE #ifndef __ASSEMBLY__ -#define ftrace_nmi_enter() do { } while (0) -#define ftrace_nmi_exit() do { } while (0) +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 7652755dc000..1cd72700fbc0 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -2,8 +2,8 @@ #define _ASM_POWERPC_FTRACE #ifndef __ASSEMBLY__ -#define ftrace_nmi_enter() do { } while (0) -#define ftrace_nmi_exit() do { } while (0) +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif #ifdef CONFIG_FUNCTION_TRACER diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h index cdf2cb0b9ffe..31ada0370cb6 100644 --- a/arch/sh/include/asm/ftrace.h +++ b/arch/sh/include/asm/ftrace.h @@ -2,8 +2,8 @@ #define __ASM_SH_FTRACE_H #ifndef __ASSEMBLY__ -#define ftrace_nmi_enter() do { } while (0) -#define ftrace_nmi_exit() do { } while (0) +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h index 33a95feeb137..62055ac0496e 100644 --- a/arch/sparc/include/asm/ftrace.h +++ b/arch/sparc/include/asm/ftrace.h @@ -2,8 +2,8 @@ #define _ASM_SPARC64_FTRACE #ifndef __ASSEMBLY__ -#define ftrace_nmi_enter() do { } while (0) -#define ftrace_nmi_exit() do { } while (0) +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif #ifdef CONFIG_MCOUNT diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index f2ed6b704a75..a23468194b8c 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -22,16 +22,16 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else -#define ftrace_nmi_enter() do { } while (0) -#define ftrace_nmi_exit() do { } while (0) -#endif +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif +#endif /* __ASSEMBLY__ */ #else /* CONFIG_FUNCTION_TRACER */ #ifndef __ASSEMBLY__ -#define ftrace_nmi_enter() do { } while (0) -#define ftrace_nmi_exit() do { } while (0) +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 6685b0fc1b44..69149337f2fe 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -67,7 +67,7 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) * * Two buffers are added: An IP buffer and a "code" buffer. * - * 1) Put in the instruction pointer into the IP buffer + * 1) Put the instruction pointer into the IP buffer * and the new code into the "code" buffer. * 2) Set a flag that says we are modifying code * 3) Wait for any running NMIs to finish. @@ -85,14 +85,14 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) * are the same as what exists. */ -static atomic_t in_nmi; -static int mod_code_status; -static int mod_code_write; -static void *mod_code_ip; -static void *mod_code_newcode; +static atomic_t in_nmi = ATOMIC_INIT(0); +static int mod_code_status; /* holds return value of text write */ +static int mod_code_write; /* set when NMI should do the write */ +static void *mod_code_ip; /* holds the IP to write to */ +static void *mod_code_newcode; /* holds the text to write to the IP */ -static int nmi_wait_count; -static atomic_t nmi_update_count; +static unsigned nmi_wait_count; +static atomic_t nmi_update_count = ATOMIC_INIT(0); int ftrace_arch_read_dyn_info(char *buf, int size) { diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 703eb53cfa2b..22240dfe912e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -74,6 +74,9 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +/* May be defined in arch */ +extern int ftrace_arch_read_dyn_info(char *buf, int size); + /** * ftrace_modify_code - modify code segment * @ip: the address of the code segment diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bc36febc0771..7f86067d760c 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2815,10 +2815,6 @@ static struct file_operations tracing_mark_fops = { #ifdef CONFIG_DYNAMIC_FTRACE -#define DYN_INFO_BUF_SIZE 1023 -static char ftrace_dyn_info_buffer[DYN_INFO_BUF_SIZE+1]; -static DEFINE_MUTEX(dyn_info_mutex); - int __weak ftrace_arch_read_dyn_info(char *buf, int size) { return 0; @@ -2828,14 +2824,17 @@ static ssize_t tracing_read_dyn_info(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { + static char ftrace_dyn_info_buffer[1024]; + static DEFINE_MUTEX(dyn_info_mutex); unsigned long *p = filp->private_data; char *buf = ftrace_dyn_info_buffer; + int size = ARRAY_SIZE(ftrace_dyn_info_buffer); int r; mutex_lock(&dyn_info_mutex); r = sprintf(buf, "%ld ", *p); - r += ftrace_arch_read_dyn_info(buf+r, DYN_INFO_BUF_SIZE-r); + r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r); buf[r++] = '\n'; r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); -- cgit v1.2.3 From 3db594380b8452eda4d88b12844077809607caaa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 7 Oct 2008 12:04:33 +0200 Subject: mac80211: remove wiphy_to_hw This isn't used by anyone, if we ever need it we can add it back, until then it's useless. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 -- net/mac80211/cfg.c | 7 ------- 2 files changed, 9 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8856e2d60e9f..e41b9a8d186f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -868,8 +868,6 @@ struct ieee80211_hw { u8 max_altrate_tries; }; -struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy); - /** * SET_IEEE80211_DEV - set device for 802.11 hardware * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 855126a3039d..8dc5e46cea68 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -17,13 +17,6 @@ #include "rate.h" #include "mesh.h" -struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy) -{ - struct ieee80211_local *local = wiphy_priv(wiphy); - return &local->hw; -} -EXPORT_SYMBOL(wiphy_to_hw); - static bool nl80211_type_check(enum nl80211_iftype type) { switch (type) { -- cgit v1.2.3 From e87a2feea75e3cba7af43ed9317b56b282d87742 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 7 Oct 2008 12:04:35 +0200 Subject: mac80211: remove max_antenna_gain config The antenna gain isn't exactly configurable, despite the belief of some unnamed individual who thinks that the EEPROM might influence it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 -- net/mac80211/main.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e41b9a8d186f..2fab5a7e5db7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -470,7 +470,6 @@ enum ieee80211_conf_flags { * @listen_interval: listen interval in units of beacon interval * @flags: configuration flags defined above * @power_level: requested transmit power (in dBm) - * @max_antenna_gain: maximum antenna gain (in dBi) * @antenna_sel_tx: transmit antenna selection, 0: default/diversity, * 1/2: antenna 0/1 * @antenna_sel_rx: receive antenna selection, like @antenna_sel_tx @@ -485,7 +484,6 @@ struct ieee80211_conf { u16 listen_interval; u32 flags; int power_level; - int max_antenna_gain; u8 antenna_sel_tx; u8 antenna_sel_rx; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 2ff26d03cd50..d1c4e86b215c 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -215,8 +215,6 @@ int ieee80211_hw_config(struct ieee80211_local *local) local->hw.conf.power_level = min(chan->max_power, local->hw.conf.power_level); - local->hw.conf.max_antenna_gain = chan->max_antenna_gain; - #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: HW CONFIG: freq=%d\n", wiphy_name(local->hw.wiphy), chan->center_freq); -- cgit v1.2.3 From 7a5158ef8da70fdedeb0530faaa8128aa645be3c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 Oct 2008 10:59:33 +0200 Subject: mac80211: fix short slot handling This patch makes mac80211 handle short slot requests from the AP properly. Also warn about uses of IEEE80211_CONF_SHORT_SLOT_TIME and optimise out the code since it cannot ever be hit anyway. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 28 +++++++++++-------- net/mac80211/main.c | 9 ++++-- net/mac80211/mlme.c | 74 +++++++++++++++++++++++++++----------------------- 3 files changed, 62 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2fab5a7e5db7..d1466e7a47b8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -180,8 +180,12 @@ enum ieee80211_bss_change { * @assoc: association status * @aid: association ID number, valid only when @assoc is true * @use_cts_prot: use CTS protection - * @use_short_preamble: use 802.11b short preamble - * @use_short_slot: use short slot time (only relevant for ERP) + * @use_short_preamble: use 802.11b short preamble; + * if the hardware cannot handle this it must set the + * IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE hardware flag + * @use_short_slot: use short slot time (only relevant for ERP); + * if the hardware cannot handle this it must set the + * IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE hardware flag * @dtim_period: num of beacons before the next DTIM, for PSM * @timestamp: beacon timestamp * @beacon_int: beacon interval @@ -442,23 +446,23 @@ struct ieee80211_rx_status { * * Flags to define PHY configuration options * - * @IEEE80211_CONF_SHORT_SLOT_TIME: use 802.11g short slot time * @IEEE80211_CONF_RADIOTAP: add radiotap header at receive time (if supported) * @IEEE80211_CONF_SUPPORT_HT_MODE: use 802.11n HT capabilities (if supported) * @IEEE80211_CONF_PS: Enable 802.11 power save mode */ enum ieee80211_conf_flags { - /* - * TODO: IEEE80211_CONF_SHORT_SLOT_TIME will be removed once drivers - * have been converted to use bss_info_changed() for slot time - * configuration - */ - IEEE80211_CONF_SHORT_SLOT_TIME = (1<<0), - IEEE80211_CONF_RADIOTAP = (1<<1), - IEEE80211_CONF_SUPPORT_HT_MODE = (1<<2), - IEEE80211_CONF_PS = (1<<3), + IEEE80211_CONF_RADIOTAP = (1<<0), + IEEE80211_CONF_SUPPORT_HT_MODE = (1<<1), + IEEE80211_CONF_PS = (1<<2), }; +/* XXX: remove all this once drivers stop trying to use it */ +static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) +{ + return 0; +} +#define IEEE80211_CONF_SHORT_SLOT_TIME (__IEEE80211_CONF_SHORT_SLOT_TIME()) + /** * struct ieee80211_conf - configuration of the device * diff --git a/net/mac80211/main.c b/net/mac80211/main.c index d1c4e86b215c..c427954fe8e8 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -346,9 +346,12 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) { - sdata->bss_conf.use_cts_prot = 0; - sdata->bss_conf.use_short_preamble = 0; - return BSS_CHANGED_ERP_CTS_PROT | BSS_CHANGED_ERP_PREAMBLE; + sdata->bss_conf.use_cts_prot = false; + sdata->bss_conf.use_short_preamble = false; + sdata->bss_conf.use_short_slot = false; + return BSS_CHANGED_ERP_CTS_PROT | + BSS_CHANGED_ERP_PREAMBLE | + BSS_CHANGED_ERP_SLOT; } void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6ad2619db85f..829995e740a7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -568,15 +568,27 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, } } -static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata, - bool use_protection, - bool use_short_preamble) +static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, + u16 capab, bool erp_valid, u8 erp) { struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf; #ifdef CONFIG_MAC80211_VERBOSE_DEBUG struct ieee80211_if_sta *ifsta = &sdata->u.sta; #endif u32 changed = 0; + bool use_protection; + bool use_short_preamble; + bool use_short_slot; + + if (erp_valid) { + use_protection = (erp & WLAN_ERP_USE_PROTECTION) != 0; + use_short_preamble = (erp & WLAN_ERP_BARKER_PREAMBLE) == 0; + } else { + use_protection = false; + use_short_preamble = !!(capab & WLAN_CAPABILITY_SHORT_PREAMBLE); + } + + use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME); if (use_protection != bss_conf->use_cts_prot) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG @@ -605,30 +617,18 @@ static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata, changed |= BSS_CHANGED_ERP_PREAMBLE; } - return changed; -} - -static u32 ieee80211_handle_erp_ie(struct ieee80211_sub_if_data *sdata, - u8 erp_value) -{ - bool use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; - bool use_short_preamble = (erp_value & WLAN_ERP_BARKER_PREAMBLE) == 0; - - return ieee80211_handle_protect_preamb(sdata, - use_protection, use_short_preamble); -} - -static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, - struct ieee80211_bss *bss) -{ - u32 changed = 0; - - if (bss->has_erp_value) - changed |= ieee80211_handle_erp_ie(sdata, bss->erp_value); - else { - u16 capab = bss->capability; - changed |= ieee80211_handle_protect_preamb(sdata, false, - (capab & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0); + if (use_short_slot != bss_conf->use_short_slot) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: switched to %s slot" + " (BSSID=%s)\n", + sdata->dev->name, + use_short_slot ? "short" : "long", + ifsta->bssid); + } +#endif + bss_conf->use_short_slot = use_short_slot; + changed |= BSS_CHANGED_ERP_SLOT; } return changed; @@ -721,7 +721,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, sdata->bss_conf.timestamp = bss->timestamp; sdata->bss_conf.dtim_period = bss->dtim_period; - changed |= ieee80211_handle_bss_capability(sdata, bss); + changed |= ieee80211_handle_bss_capability(sdata, + bss->capability, bss->has_erp_value, bss->erp_value); ieee80211_rx_bss_put(local, bss); } @@ -1657,6 +1658,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_conf *conf = &local->hw.conf; u32 changed = 0; + bool erp_valid; + u8 erp_value = 0; /* Process beacon from the current BSS */ baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; @@ -1678,13 +1681,16 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, elems.wmm_param_len); - if (elems.erp_info && elems.erp_info_len >= 1) - changed |= ieee80211_handle_erp_ie(sdata, elems.erp_info[0]); - else { - u16 capab = le16_to_cpu(mgmt->u.beacon.capab_info); - changed |= ieee80211_handle_protect_preamb(sdata, false, - (capab & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0); + + if (elems.erp_info && elems.erp_info_len >= 1) { + erp_valid = true; + erp_value = elems.erp_info[0]; + } else { + erp_valid = false; } + changed |= ieee80211_handle_bss_capability(sdata, + le16_to_cpu(mgmt->u.beacon.capab_info), + erp_valid, erp_value); if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param && conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) { -- cgit v1.2.3 From d9fe60dea7779d412b34679f1177c5ca1940ea8d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Oct 2008 12:13:49 +0200 Subject: 802.11: clean up/fix HT support This patch cleans up a number of things: * the unusable definition of the HT capabilities/HT information information elements * variable names that are hard to understand * mac80211: move ieee80211_handle_ht to ht.c and remove the unused enable_ht parameter * mac80211: fix bug with MCS rate 32 in ieee80211_handle_ht * mac80211: fix bug with casting the result of ieee80211_bss_get_ie to an information element _contents_ rather than the whole element, add size checking (another out-of-bounds access bug fixed!) * mac80211: remove some unused return values in favour of BUG_ON checking * a few minor other things Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/main.c | 57 ++++++----- drivers/net/wireless/ath9k/rc.c | 10 +- drivers/net/wireless/ath9k/rc.h | 1 - drivers/net/wireless/ath9k/recv.c | 2 +- drivers/net/wireless/ath9k/xmit.c | 2 +- drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 18 ++-- drivers/net/wireless/iwlwifi/iwl-agn.c | 20 ++-- drivers/net/wireless/iwlwifi/iwl-core.c | 71 +++++++------- drivers/net/wireless/iwlwifi/iwl-core.h | 2 +- drivers/net/wireless/iwlwifi/iwl-dev.h | 4 +- drivers/net/wireless/iwlwifi/iwl-scan.c | 12 +-- drivers/net/wireless/iwlwifi/iwl-sta.c | 6 +- drivers/net/wireless/mac80211_hwsim.c | 19 ++-- include/linux/ieee80211.h | 133 ++++++++++++++++++-------- include/net/mac80211.h | 12 +-- include/net/wireless.h | 15 +-- net/mac80211/cfg.c | 7 +- net/mac80211/ht.c | 151 +++++++++++++++++++++++++----- net/mac80211/ieee80211_i.h | 16 ++-- net/mac80211/main.c | 94 ------------------- net/mac80211/mlme.c | 45 ++++----- net/mac80211/util.c | 4 +- net/mac80211/wext.c | 4 +- 23 files changed, 386 insertions(+), 319 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c index 186d75acb326..5e087c92a6d9 100644 --- a/drivers/net/wireless/ath9k/main.c +++ b/drivers/net/wireless/ath9k/main.c @@ -61,24 +61,24 @@ static u32 ath_get_extchanmode(struct ath_softc *sc, switch (chan->band) { case IEEE80211_BAND_2GHZ: - if ((ext_chan_offset == IEEE80211_HT_IE_CHA_SEC_NONE) && + if ((ext_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_NONE) && (tx_chan_width == ATH9K_HT_MACMODE_20)) chanmode = CHANNEL_G_HT20; - if ((ext_chan_offset == IEEE80211_HT_IE_CHA_SEC_ABOVE) && + if ((ext_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_ABOVE) && (tx_chan_width == ATH9K_HT_MACMODE_2040)) chanmode = CHANNEL_G_HT40PLUS; - if ((ext_chan_offset == IEEE80211_HT_IE_CHA_SEC_BELOW) && + if ((ext_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_BELOW) && (tx_chan_width == ATH9K_HT_MACMODE_2040)) chanmode = CHANNEL_G_HT40MINUS; break; case IEEE80211_BAND_5GHZ: - if ((ext_chan_offset == IEEE80211_HT_IE_CHA_SEC_NONE) && + if ((ext_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_NONE) && (tx_chan_width == ATH9K_HT_MACMODE_20)) chanmode = CHANNEL_A_HT20; - if ((ext_chan_offset == IEEE80211_HT_IE_CHA_SEC_ABOVE) && + if ((ext_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_ABOVE) && (tx_chan_width == ATH9K_HT_MACMODE_2040)) chanmode = CHANNEL_A_HT40PLUS; - if ((ext_chan_offset == IEEE80211_HT_IE_CHA_SEC_BELOW) && + if ((ext_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_BELOW) && (tx_chan_width == ATH9K_HT_MACMODE_2040)) chanmode = CHANNEL_A_HT40MINUS; break; @@ -215,24 +215,24 @@ static void ath_key_delete(struct ath_softc *sc, struct ieee80211_key_conf *key) ath_key_reset(sc, key->keyidx, freeslot); } -static void setup_ht_cap(struct ieee80211_ht_info *ht_info) +static void setup_ht_cap(struct ieee80211_sta_ht_cap *ht_info) { #define ATH9K_HT_CAP_MAXRXAMPDU_65536 0x3 /* 2 ^ 16 */ #define ATH9K_HT_CAP_MPDUDENSITY_8 0x6 /* 8 usec */ - ht_info->ht_supported = 1; - ht_info->cap = (u16)IEEE80211_HT_CAP_SUP_WIDTH - |(u16)IEEE80211_HT_CAP_SM_PS - |(u16)IEEE80211_HT_CAP_SGI_40 - |(u16)IEEE80211_HT_CAP_DSSSCCK40; + ht_info->ht_supported = true; + ht_info->cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 | + IEEE80211_HT_CAP_SM_PS | + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_DSSSCCK40; ht_info->ampdu_factor = ATH9K_HT_CAP_MAXRXAMPDU_65536; ht_info->ampdu_density = ATH9K_HT_CAP_MPDUDENSITY_8; - /* setup supported mcs set */ - memset(ht_info->supp_mcs_set, 0, 16); - ht_info->supp_mcs_set[0] = 0xff; - ht_info->supp_mcs_set[1] = 0xff; - ht_info->supp_mcs_set[12] = IEEE80211_HT_CAP_MCS_TX_DEFINED; + /* set up supported mcs set */ + memset(&ht_info->mcs, 0, sizeof(ht_info->mcs)); + ht_info->mcs.rx_mask[0] = 0xff; + ht_info->mcs.rx_mask[1] = 0xff; + ht_info->mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED; } static int ath_rate2idx(struct ath_softc *sc, int rate) @@ -328,31 +328,28 @@ static u8 parse_mpdudensity(u8 mpdudensity) static void ath9k_ht_conf(struct ath_softc *sc, struct ieee80211_bss_conf *bss_conf) { -#define IEEE80211_HT_CAP_40MHZ_INTOLERANT BIT(14) struct ath_ht_info *ht_info = &sc->sc_ht_info; if (bss_conf->assoc_ht) { ht_info->ext_chan_offset = bss_conf->ht_bss_conf->bss_cap & - IEEE80211_HT_IE_CHA_SEC_OFFSET; + IEEE80211_HT_PARAM_CHA_SEC_OFFSET; - if (!(bss_conf->ht_conf->cap & + if (!(bss_conf->ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT) && (bss_conf->ht_bss_conf->bss_cap & - IEEE80211_HT_IE_CHA_WIDTH)) + IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) ht_info->tx_chan_width = ATH9K_HT_MACMODE_2040; else ht_info->tx_chan_width = ATH9K_HT_MACMODE_20; ath9k_hw_set11nmac2040(sc->sc_ah, ht_info->tx_chan_width); ht_info->maxampdu = 1 << (IEEE80211_HTCAP_MAXRXAMPDU_FACTOR + - bss_conf->ht_conf->ampdu_factor); + bss_conf->ht_cap->ampdu_factor); ht_info->mpdudensity = - parse_mpdudensity(bss_conf->ht_conf->ampdu_density); + parse_mpdudensity(bss_conf->ht_cap->ampdu_density); } - -#undef IEEE80211_HT_CAP_40MHZ_INTOLERANT } static void ath9k_bss_assoc_info(struct ath_softc *sc, @@ -411,7 +408,7 @@ static void ath9k_bss_assoc_info(struct ath_softc *sc, return; } - if (hw->conf.ht_conf.ht_supported) + if (hw->conf.ht_cap.ht_supported) sc->sc_ah->ah_channels[pos].chanmode = ath_get_extchanmode(sc, curchan); else @@ -534,7 +531,7 @@ int _ath_rx_indicate(struct ath_softc *sc, if (an) { ath_rx_input(sc, an, - hw->conf.ht_conf.ht_supported, + hw->conf.ht_cap.ht_supported, skb, status, &st); } if (!an || (st != ATH_RX_CONSUMED)) @@ -943,7 +940,7 @@ static int ath_attach(u16 devid, if (sc->sc_ah->ah_caps.hw_caps & ATH9K_HW_CAP_HT) /* Setup HT capabilities for 2.4Ghz*/ - setup_ht_cap(&sc->sbands[IEEE80211_BAND_2GHZ].ht_info); + setup_ht_cap(&sc->sbands[IEEE80211_BAND_2GHZ].ht_cap); hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &sc->sbands[IEEE80211_BAND_2GHZ]; @@ -958,7 +955,7 @@ static int ath_attach(u16 devid, if (sc->sc_ah->ah_caps.hw_caps & ATH9K_HW_CAP_HT) /* Setup HT capabilities for 5Ghz*/ - setup_ht_cap(&sc->sbands[IEEE80211_BAND_5GHZ].ht_info); + setup_ht_cap(&sc->sbands[IEEE80211_BAND_5GHZ].ht_cap); hw->wiphy->bands[IEEE80211_BAND_5GHZ] = &sc->sbands[IEEE80211_BAND_5GHZ]; @@ -1254,7 +1251,7 @@ static int ath9k_config(struct ieee80211_hw *hw, (curchan->band == IEEE80211_BAND_2GHZ) ? CHANNEL_G : CHANNEL_A; - if (sc->sc_curaid && hw->conf.ht_conf.ht_supported) + if (sc->sc_curaid && hw->conf.ht_cap.ht_supported) sc->sc_ah->ah_channels[pos].chanmode = ath_get_extchanmode(sc, curchan); diff --git a/drivers/net/wireless/ath9k/rc.c b/drivers/net/wireless/ath9k/rc.c index b1e535b8ec48..ee2dbce42b4d 100644 --- a/drivers/net/wireless/ath9k/rc.c +++ b/drivers/net/wireless/ath9k/rc.c @@ -1838,7 +1838,7 @@ void ath_rc_node_update(struct ieee80211_hw *hw, struct ath_rate_node *rc_priv) struct ath_softc *sc = hw->priv; u32 capflag = 0; - if (hw->conf.ht_conf.ht_supported) { + if (hw->conf.ht_cap.ht_supported) { capflag |= ATH_RC_HT_FLAG | ATH_RC_DS_FLAG; if (sc->sc_ht_info.tx_chan_width == ATH9K_HT_MACMODE_2040) capflag |= ATH_RC_CW40_FLAG; @@ -1910,7 +1910,7 @@ static void ath_tx_aggr_resp(struct ath_softc *sc, */ si = container_of(sta, struct sta_info, sta); buffersize = IEEE80211_MIN_AMPDU_BUF << - sband->ht_info.ampdu_factor; /* FIXME */ + sband->ht_cap.ampdu_factor; /* FIXME */ state = si->ampdu_mlme.tid_state_tx[tidno]; if (state & HT_ADDBA_RECEIVED_MSK) { @@ -1979,7 +1979,7 @@ static void ath_get_rate(void *priv, struct ieee80211_supported_band *sband, /* Check if aggregation has to be enabled for this tid */ - if (hw->conf.ht_conf.ht_supported) { + if (hw->conf.ht_cap.ht_supported) { if (ieee80211_is_data_qos(fc)) { qc = ieee80211_get_qos_ctl(hdr); tid = qc[0] & 0xf; @@ -2027,8 +2027,8 @@ static void ath_rate_init(void *priv, struct ieee80211_supported_band *sband, ath_setup_rates(sc, sband, sta, ath_rc_priv); if (sc->hw->conf.flags & IEEE80211_CONF_SUPPORT_HT_MODE) { - for (i = 0; i < MCS_SET_SIZE; i++) { - if (sc->hw->conf.ht_conf.supp_mcs_set[i/8] & (1<<(i%8))) + for (i = 0; i < 77; i++) { + if (sc->hw->conf.ht_cap.mcs.rx_mask[i/8] & (1<<(i%8))) ath_rc_priv->neg_ht_rates.rs_rates[j++] = i; if (j == ATH_RATE_MAX) break; diff --git a/drivers/net/wireless/ath9k/rc.h b/drivers/net/wireless/ath9k/rc.h index b95b41508b98..6671097fad72 100644 --- a/drivers/net/wireless/ath9k/rc.h +++ b/drivers/net/wireless/ath9k/rc.h @@ -59,7 +59,6 @@ struct ath_softc; #define FALSE 0 #define ATH_RATE_MAX 30 -#define MCS_SET_SIZE 128 enum ieee80211_fixed_rate_mode { IEEE80211_FIXED_RATE_NONE = 0, diff --git a/drivers/net/wireless/ath9k/recv.c b/drivers/net/wireless/ath9k/recv.c index 4983402af559..010fcdfec3f6 100644 --- a/drivers/net/wireless/ath9k/recv.c +++ b/drivers/net/wireless/ath9k/recv.c @@ -1119,7 +1119,7 @@ int ath_rx_aggr_start(struct ath_softc *sc, sband = hw->wiphy->bands[hw->conf.channel->band]; buffersize = IEEE80211_MIN_AMPDU_BUF << - sband->ht_info.ampdu_factor; /* FIXME */ + sband->ht_cap.ampdu_factor; /* FIXME */ rxtid = &an->an_aggr.rx.tid[tid]; diff --git a/drivers/net/wireless/ath9k/xmit.c b/drivers/net/wireless/ath9k/xmit.c index 13866043dec9..3770fbe84fce 100644 --- a/drivers/net/wireless/ath9k/xmit.c +++ b/drivers/net/wireless/ath9k/xmit.c @@ -300,7 +300,7 @@ static int ath_tx_prepare(struct ath_softc *sc, if (ieee80211_is_data(fc) && !txctl->use_minrate) { /* Enable HT only for DATA frames and not for EAPOL */ - txctl->ht = (hw->conf.ht_conf.ht_supported && + txctl->ht = (hw->conf.ht_cap.ht_supported && (tx_info->flags & IEEE80211_TX_CTL_AMPDU)); if (is_multicast_ether_addr(hdr->addr1)) { diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c index b497d40dc396..cd1bff590491 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c @@ -1134,10 +1134,10 @@ static int rs_switch_to_mimo2(struct iwl_priv *priv, s8 is_green = lq_sta->is_green; if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) || - !sta->ht_info.ht_supported) + !sta->ht_cap.ht_supported) return -1; - if (((sta->ht_info.cap & IEEE80211_HT_CAP_SM_PS) >> 2) + if (((sta->ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >> 2) == WLAN_HT_CAP_SM_PS_STATIC) return -1; @@ -1202,7 +1202,7 @@ static int rs_switch_to_siso(struct iwl_priv *priv, s32 rate; if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) || - !sta->ht_info.ht_supported) + !sta->ht_cap.ht_supported) return -1; IWL_DEBUG_RATE("LQ: try to switch to SISO\n"); @@ -2238,19 +2238,19 @@ static void rs_rate_init(void *priv_r, struct ieee80211_supported_band *sband, * active_siso_rate mask includes 9 MBits (bit 5), and CCK (bits 0-3), * supp_rates[] does not; shift to convert format, force 9 MBits off. */ - lq_sta->active_siso_rate = conf->ht_conf.supp_mcs_set[0] << 1; - lq_sta->active_siso_rate |= conf->ht_conf.supp_mcs_set[0] & 0x1; + lq_sta->active_siso_rate = conf->ht_cap.mcs.rx_mask[0] << 1; + lq_sta->active_siso_rate |= conf->ht_cap.mcs.rx_mask[0] & 0x1; lq_sta->active_siso_rate &= ~((u16)0x2); lq_sta->active_siso_rate <<= IWL_FIRST_OFDM_RATE; /* Same here */ - lq_sta->active_mimo2_rate = conf->ht_conf.supp_mcs_set[1] << 1; - lq_sta->active_mimo2_rate |= conf->ht_conf.supp_mcs_set[1] & 0x1; + lq_sta->active_mimo2_rate = conf->ht_cap.mcs.rx_mask[1] << 1; + lq_sta->active_mimo2_rate |= conf->ht_cap.mcs.rx_mask[1] & 0x1; lq_sta->active_mimo2_rate &= ~((u16)0x2); lq_sta->active_mimo2_rate <<= IWL_FIRST_OFDM_RATE; - lq_sta->active_mimo3_rate = conf->ht_conf.supp_mcs_set[2] << 1; - lq_sta->active_mimo3_rate |= conf->ht_conf.supp_mcs_set[2] & 0x1; + lq_sta->active_mimo3_rate = conf->ht_cap.mcs.rx_mask[2] << 1; + lq_sta->active_mimo3_rate |= conf->ht_cap.mcs.rx_mask[2] & 0x1; lq_sta->active_mimo3_rate &= ~((u16)0x2); lq_sta->active_mimo3_rate <<= IWL_FIRST_OFDM_RATE; diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 2cac09405afe..e6695e80fb53 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -552,7 +552,7 @@ static int iwl4965_send_beacon_cmd(struct iwl_priv *priv) static void iwl4965_ht_conf(struct iwl_priv *priv, struct ieee80211_bss_conf *bss_conf) { - struct ieee80211_ht_info *ht_conf = bss_conf->ht_conf; + struct ieee80211_sta_ht_cap *ht_conf = bss_conf->ht_cap; struct ieee80211_ht_bss_info *ht_bss_conf = bss_conf->ht_bss_conf; struct iwl_ht_info *iwl_conf = &priv->current_ht_config; @@ -573,27 +573,27 @@ static void iwl4965_ht_conf(struct iwl_priv *priv, !!(ht_conf->cap & IEEE80211_HT_CAP_MAX_AMSDU); iwl_conf->supported_chan_width = - !!(ht_conf->cap & IEEE80211_HT_CAP_SUP_WIDTH); + !!(ht_conf->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40); iwl_conf->extension_chan_offset = - ht_bss_conf->bss_cap & IEEE80211_HT_IE_CHA_SEC_OFFSET; + ht_bss_conf->bss_cap & IEEE80211_HT_PARAM_CHA_SEC_OFFSET; /* If no above or below channel supplied disable FAT channel */ - if (iwl_conf->extension_chan_offset != IEEE80211_HT_IE_CHA_SEC_ABOVE && - iwl_conf->extension_chan_offset != IEEE80211_HT_IE_CHA_SEC_BELOW) { - iwl_conf->extension_chan_offset = IEEE80211_HT_IE_CHA_SEC_NONE; + if (iwl_conf->extension_chan_offset != IEEE80211_HT_PARAM_CHA_SEC_ABOVE && + iwl_conf->extension_chan_offset != IEEE80211_HT_PARAM_CHA_SEC_BELOW) { + iwl_conf->extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; iwl_conf->supported_chan_width = 0; } iwl_conf->sm_ps = (u8)((ht_conf->cap & IEEE80211_HT_CAP_SM_PS) >> 2); - memcpy(iwl_conf->supp_mcs_set, ht_conf->supp_mcs_set, 16); + memcpy(&iwl_conf->mcs, &ht_conf->mcs, 16); iwl_conf->control_channel = ht_bss_conf->primary_channel; iwl_conf->tx_chan_width = - !!(ht_bss_conf->bss_cap & IEEE80211_HT_IE_CHA_WIDTH); + !!(ht_bss_conf->bss_cap & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY); iwl_conf->ht_protection = - ht_bss_conf->bss_op_mode & IEEE80211_HT_IE_HT_PROTECTION; + ht_bss_conf->bss_op_mode & IEEE80211_HT_OP_MODE_PROTECTION; iwl_conf->non_GF_STA_present = - !!(ht_bss_conf->bss_op_mode & IEEE80211_HT_IE_NON_GF_STA_PRSNT); + !!(ht_bss_conf->bss_op_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT); IWL_DEBUG_MAC80211("control channel %d\n", iwl_conf->control_channel); IWL_DEBUG_MAC80211("leave\n"); diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 4c312c55f90c..4678da447ff6 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -382,10 +382,10 @@ void iwl_reset_qos(struct iwl_priv *priv) } EXPORT_SYMBOL(iwl_reset_qos); -#define MAX_BIT_RATE_40_MHZ 0x96 /* 150 Mbps */ -#define MAX_BIT_RATE_20_MHZ 0x48 /* 72 Mbps */ +#define MAX_BIT_RATE_40_MHZ 150 /* Mbps */ +#define MAX_BIT_RATE_20_MHZ 72 /* Mbps */ static void iwlcore_init_ht_hw_capab(const struct iwl_priv *priv, - struct ieee80211_ht_info *ht_info, + struct ieee80211_sta_ht_cap *ht_info, enum ieee80211_band band) { u16 max_bit_rate = 0; @@ -393,45 +393,46 @@ static void iwlcore_init_ht_hw_capab(const struct iwl_priv *priv, u8 tx_chains_num = priv->hw_params.tx_chains_num; ht_info->cap = 0; - memset(ht_info->supp_mcs_set, 0, 16); + memset(&ht_info->mcs, 0, sizeof(ht_info->mcs)); - ht_info->ht_supported = 1; + ht_info->ht_supported = true; - ht_info->cap |= (u16)IEEE80211_HT_CAP_GRN_FLD; - ht_info->cap |= (u16)IEEE80211_HT_CAP_SGI_20; - ht_info->cap |= (u16)(IEEE80211_HT_CAP_SM_PS & + ht_info->cap |= IEEE80211_HT_CAP_GRN_FLD; + ht_info->cap |= IEEE80211_HT_CAP_SGI_20; + ht_info->cap |= (IEEE80211_HT_CAP_SM_PS & (WLAN_HT_CAP_SM_PS_DISABLED << 2)); max_bit_rate = MAX_BIT_RATE_20_MHZ; if (priv->hw_params.fat_channel & BIT(band)) { - ht_info->cap |= (u16)IEEE80211_HT_CAP_SUP_WIDTH; - ht_info->cap |= (u16)IEEE80211_HT_CAP_SGI_40; - ht_info->supp_mcs_set[4] = 0x01; + ht_info->cap |= IEEE80211_HT_CAP_SUP_WIDTH_20_40; + ht_info->cap |= IEEE80211_HT_CAP_SGI_40; + ht_info->mcs.rx_mask[4] = 0x01; max_bit_rate = MAX_BIT_RATE_40_MHZ; } if (priv->cfg->mod_params->amsdu_size_8K) - ht_info->cap |= (u16)IEEE80211_HT_CAP_MAX_AMSDU; + ht_info->cap |= IEEE80211_HT_CAP_MAX_AMSDU; ht_info->ampdu_factor = CFG_HT_RX_AMPDU_FACTOR_DEF; ht_info->ampdu_density = CFG_HT_MPDU_DENSITY_DEF; - ht_info->supp_mcs_set[0] = 0xFF; + ht_info->mcs.rx_mask[0] = 0xFF; if (rx_chains_num >= 2) - ht_info->supp_mcs_set[1] = 0xFF; + ht_info->mcs.rx_mask[1] = 0xFF; if (rx_chains_num >= 3) - ht_info->supp_mcs_set[2] = 0xFF; + ht_info->mcs.rx_mask[2] = 0xFF; /* Highest supported Rx data rate */ max_bit_rate *= rx_chains_num; - ht_info->supp_mcs_set[10] = (u8)(max_bit_rate & 0x00FF); - ht_info->supp_mcs_set[11] = (u8)((max_bit_rate & 0xFF00) >> 8); + WARN_ON(max_bit_rate & ~IEEE80211_HT_MCS_RX_HIGHEST_MASK); + ht_info->mcs.rx_highest = cpu_to_le16(max_bit_rate); /* Tx MCS capabilities */ - ht_info->supp_mcs_set[12] = IEEE80211_HT_CAP_MCS_TX_DEFINED; + ht_info->mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED; if (tx_chains_num != rx_chains_num) { - ht_info->supp_mcs_set[12] |= IEEE80211_HT_CAP_MCS_TX_RX_DIFF; - ht_info->supp_mcs_set[12] |= ((tx_chains_num - 1) << 2); + ht_info->mcs.tx_params |= IEEE80211_HT_MCS_TX_RX_DIFF; + ht_info->mcs.tx_params |= ((tx_chains_num - 1) << + IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT); } } @@ -495,7 +496,7 @@ static int iwlcore_init_geos(struct iwl_priv *priv) sband->n_bitrates = IWL_RATE_COUNT - IWL_FIRST_OFDM_RATE; if (priv->cfg->sku & IWL_SKU_N) - iwlcore_init_ht_hw_capab(priv, &sband->ht_info, + iwlcore_init_ht_hw_capab(priv, &sband->ht_cap, IEEE80211_BAND_5GHZ); sband = &priv->bands[IEEE80211_BAND_2GHZ]; @@ -505,7 +506,7 @@ static int iwlcore_init_geos(struct iwl_priv *priv) sband->n_bitrates = IWL_RATE_COUNT; if (priv->cfg->sku & IWL_SKU_N) - iwlcore_init_ht_hw_capab(priv, &sband->ht_info, + iwlcore_init_ht_hw_capab(priv, &sband->ht_cap, IEEE80211_BAND_2GHZ); priv->ieee_channels = channels; @@ -595,8 +596,8 @@ static void iwlcore_free_geos(struct iwl_priv *priv) static bool is_single_rx_stream(struct iwl_priv *priv) { return !priv->current_ht_config.is_ht || - ((priv->current_ht_config.supp_mcs_set[1] == 0) && - (priv->current_ht_config.supp_mcs_set[2] == 0)); + ((priv->current_ht_config.mcs.rx_mask[1] == 0) && + (priv->current_ht_config.mcs.rx_mask[2] == 0)); } static u8 iwl_is_channel_extension(struct iwl_priv *priv, @@ -609,10 +610,10 @@ static u8 iwl_is_channel_extension(struct iwl_priv *priv, if (!is_channel_valid(ch_info)) return 0; - if (extension_chan_offset == IEEE80211_HT_IE_CHA_SEC_ABOVE) + if (extension_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_ABOVE) return !(ch_info->fat_extension_channel & IEEE80211_CHAN_NO_FAT_ABOVE); - else if (extension_chan_offset == IEEE80211_HT_IE_CHA_SEC_BELOW) + else if (extension_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_BELOW) return !(ch_info->fat_extension_channel & IEEE80211_CHAN_NO_FAT_BELOW); @@ -620,18 +621,18 @@ static u8 iwl_is_channel_extension(struct iwl_priv *priv, } u8 iwl_is_fat_tx_allowed(struct iwl_priv *priv, - struct ieee80211_ht_info *sta_ht_inf) + struct ieee80211_sta_ht_cap *sta_ht_inf) { struct iwl_ht_info *iwl_ht_conf = &priv->current_ht_config; if ((!iwl_ht_conf->is_ht) || (iwl_ht_conf->supported_chan_width != IWL_CHANNEL_WIDTH_40MHZ) || - (iwl_ht_conf->extension_chan_offset == IEEE80211_HT_IE_CHA_SEC_NONE)) + (iwl_ht_conf->extension_chan_offset == IEEE80211_HT_PARAM_CHA_SEC_NONE)) return 0; if (sta_ht_inf) { if ((!sta_ht_inf->ht_supported) || - (!(sta_ht_inf->cap & IEEE80211_HT_CAP_SUP_WIDTH))) + (!(sta_ht_inf->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40))) return 0; } @@ -671,13 +672,13 @@ void iwl_set_rxon_ht(struct iwl_priv *priv, struct iwl_ht_info *ht_info) /* Note: control channel is opposite of extension channel */ switch (ht_info->extension_chan_offset) { - case IEEE80211_HT_IE_CHA_SEC_ABOVE: + case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: rxon->flags &= ~(RXON_FLG_CTRL_CHANNEL_LOC_HI_MSK); break; - case IEEE80211_HT_IE_CHA_SEC_BELOW: + case IEEE80211_HT_PARAM_CHA_SEC_BELOW: rxon->flags |= RXON_FLG_CTRL_CHANNEL_LOC_HI_MSK; break; - case IEEE80211_HT_IE_CHA_SEC_NONE: + case IEEE80211_HT_PARAM_CHA_SEC_NONE: default: rxon->flags &= ~RXON_FLG_CHANNEL_MODE_MIXED_MSK; break; @@ -693,9 +694,9 @@ void iwl_set_rxon_ht(struct iwl_priv *priv, struct iwl_ht_info *ht_info) "rxon flags 0x%X operation mode :0x%X " "extension channel offset 0x%x " "control chan %d\n", - ht_info->supp_mcs_set[0], - ht_info->supp_mcs_set[1], - ht_info->supp_mcs_set[2], + ht_info->mcs.rx_mask[0], + ht_info->mcs.rx_mask[1], + ht_info->mcs.rx_mask[2], le32_to_cpu(rxon->flags), ht_info->ht_protection, ht_info->extension_chan_offset, ht_info->control_channel); diff --git a/drivers/net/wireless/iwlwifi/iwl-core.h b/drivers/net/wireless/iwlwifi/iwl-core.h index 288b6a800e03..1a3ad8b1ebdb 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.h +++ b/drivers/net/wireless/iwlwifi/iwl-core.h @@ -190,7 +190,7 @@ void iwl_set_rxon_chain(struct iwl_priv *priv); int iwl_set_rxon_channel(struct iwl_priv *priv, struct ieee80211_channel *ch); void iwl_set_rxon_ht(struct iwl_priv *priv, struct iwl_ht_info *ht_info); u8 iwl_is_fat_tx_allowed(struct iwl_priv *priv, - struct ieee80211_ht_info *sta_ht_inf); + struct ieee80211_sta_ht_cap *sta_ht_inf); int iwl_hw_nic_init(struct iwl_priv *priv); int iwl_setup_mac(struct iwl_priv *priv); int iwl_set_hw_params(struct iwl_priv *priv); diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h index 34306b6798e2..572250ee9d58 100644 --- a/drivers/net/wireless/iwlwifi/iwl-dev.h +++ b/drivers/net/wireless/iwlwifi/iwl-dev.h @@ -411,7 +411,7 @@ struct iwl_ht_info { u8 max_amsdu_size; u8 ampdu_factor; u8 mpdu_density; - u8 supp_mcs_set[16]; + struct ieee80211_mcs_info mcs; /* BSS related data */ u8 control_channel; u8 extension_chan_offset; @@ -585,7 +585,7 @@ struct iwl_addsta_cmd; extern int iwl_send_add_sta(struct iwl_priv *priv, struct iwl_addsta_cmd *sta, u8 flags); extern u8 iwl_add_station_flags(struct iwl_priv *priv, const u8 *addr, - int is_ap, u8 flags, struct ieee80211_ht_info *ht_info); + int is_ap, u8 flags, struct ieee80211_sta_ht_cap *ht_info); extern void iwl4965_update_chain_flags(struct iwl_priv *priv); extern int iwl4965_set_pwr_src(struct iwl_priv *priv, enum iwl_pwr_src src); extern const u8 iwl_bcast_addr[ETH_ALEN]; diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c index 3b0bee331a33..78d16bd7b745 100644 --- a/drivers/net/wireless/iwlwifi/iwl-scan.c +++ b/drivers/net/wireless/iwlwifi/iwl-scan.c @@ -550,7 +550,7 @@ static void iwl_ht_cap_to_ie(const struct ieee80211_supported_band *sband, { struct ieee80211_ht_cap *ht_cap; - if (!sband || !sband->ht_info.ht_supported) + if (!sband || !sband->ht_cap.ht_supported) return; if (*left < sizeof(struct ieee80211_ht_cap)) @@ -559,12 +559,12 @@ static void iwl_ht_cap_to_ie(const struct ieee80211_supported_band *sband, *pos++ = sizeof(struct ieee80211_ht_cap); ht_cap = (struct ieee80211_ht_cap *) pos; - ht_cap->cap_info = cpu_to_le16(sband->ht_info.cap); - memcpy(ht_cap->supp_mcs_set, sband->ht_info.supp_mcs_set, 16); + ht_cap->cap_info = cpu_to_le16(sband->ht_cap.cap); + memcpy(&ht_cap->mcs, &sband->ht_cap.mcs, 16); ht_cap->ampdu_params_info = - (sband->ht_info.ampdu_factor & IEEE80211_HT_CAP_AMPDU_FACTOR) | - ((sband->ht_info.ampdu_density << 2) & - IEEE80211_HT_CAP_AMPDU_DENSITY); + (sband->ht_cap.ampdu_factor & IEEE80211_HT_AMPDU_PARM_FACTOR) | + ((sband->ht_cap.ampdu_density << 2) & + IEEE80211_HT_AMPDU_PARM_DENSITY); *left -= sizeof(struct ieee80211_ht_cap); } diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c index a28a8decc79c..b9b8554433a6 100644 --- a/drivers/net/wireless/iwlwifi/iwl-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-sta.c @@ -181,7 +181,7 @@ int iwl_send_add_sta(struct iwl_priv *priv, EXPORT_SYMBOL(iwl_send_add_sta); static void iwl_set_ht_add_station(struct iwl_priv *priv, u8 index, - struct ieee80211_ht_info *sta_ht_inf) + struct ieee80211_sta_ht_cap *sta_ht_inf) { __le32 sta_flags; u8 mimo_ps_mode; @@ -229,7 +229,7 @@ static void iwl_set_ht_add_station(struct iwl_priv *priv, u8 index, * iwl_add_station_flags - Add station to tables in driver and device */ u8 iwl_add_station_flags(struct iwl_priv *priv, const u8 *addr, int is_ap, - u8 flags, struct ieee80211_ht_info *ht_info) + u8 flags, struct ieee80211_sta_ht_cap *ht_info) { int i; int sta_id = IWL_INVALID_STATION; @@ -894,7 +894,7 @@ int iwl_rxon_add_station(struct iwl_priv *priv, const u8 *addr, int is_ap) /* Add station to device's station table */ struct ieee80211_conf *conf = &priv->hw->conf; - struct ieee80211_ht_info *cur_ht_config = &conf->ht_conf; + struct ieee80211_sta_ht_cap *cur_ht_config = &conf->ht_cap; if ((is_ap) && (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) && diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index e23d9a52d083..3f236b546683 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -563,19 +563,18 @@ static int __init init_mac80211_hwsim(void) data->band.n_channels = ARRAY_SIZE(hwsim_channels); data->band.bitrates = data->rates; data->band.n_bitrates = ARRAY_SIZE(hwsim_rates); - data->band.ht_info.ht_supported = 1; - data->band.ht_info.cap = IEEE80211_HT_CAP_SUP_WIDTH | + data->band.ht_cap.ht_supported = true; + data->band.ht_cap.cap = IEEE80211_HT_CAP_SUP_WIDTH_20_40 | IEEE80211_HT_CAP_GRN_FLD | IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_DSSSCCK40; - data->band.ht_info.ampdu_factor = 0x3; - data->band.ht_info.ampdu_density = 0x6; - memset(data->band.ht_info.supp_mcs_set, 0, - sizeof(data->band.ht_info.supp_mcs_set)); - data->band.ht_info.supp_mcs_set[0] = 0xff; - data->band.ht_info.supp_mcs_set[1] = 0xff; - data->band.ht_info.supp_mcs_set[12] = - IEEE80211_HT_CAP_MCS_TX_DEFINED; + data->band.ht_cap.ampdu_factor = 0x3; + data->band.ht_cap.ampdu_density = 0x6; + memset(&data->band.ht_cap.mcs, 0, + sizeof(data->band.ht_cap.mcs)); + data->band.ht_cap.mcs.rx_mask[0] = 0xff; + data->band.ht_cap.mcs.rx_mask[1] = 0xff; + data->band.ht_cap.mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED; hw->wiphy->bands[IEEE80211_BAND_2GHZ] = &data->band; err = ieee80211_register_hw(hw); diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 14126bc36641..64a4abce6d91 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -685,28 +685,88 @@ struct ieee80211_bar { #define IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL 0x0000 #define IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA 0x0004 + +#define IEEE80211_HT_MCS_MASK_LEN 10 + +/** + * struct ieee80211_mcs_info - MCS information + * @rx_mask: RX mask + * @rx_highest: highest supported RX rate + * @tx_params: TX parameters + */ +struct ieee80211_mcs_info { + u8 rx_mask[IEEE80211_HT_MCS_MASK_LEN]; + __le16 rx_highest; + u8 tx_params; + u8 reserved[3]; +} __attribute__((packed)); + +/* 802.11n HT capability MSC set */ +#define IEEE80211_HT_MCS_RX_HIGHEST_MASK 0x3ff +#define IEEE80211_HT_MCS_TX_DEFINED 0x01 +#define IEEE80211_HT_MCS_TX_RX_DIFF 0x02 +/* value 0 == 1 stream etc */ +#define IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK 0x0C +#define IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT 2 +#define IEEE80211_HT_MCS_TX_MAX_STREAMS 4 +#define IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION 0x10 + +/* + * 802.11n D5.0 20.3.5 / 20.6 says: + * - indices 0 to 7 and 32 are single spatial stream + * - 8 to 31 are multiple spatial streams using equal modulation + * [8..15 for two streams, 16..23 for three and 24..31 for four] + * - remainder are multiple spatial streams using unequal modulation + */ +#define IEEE80211_HT_MCS_UNEQUAL_MODULATION_START 33 +#define IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE \ + (IEEE80211_HT_MCS_UNEQUAL_MODULATION_START / 8) + /** * struct ieee80211_ht_cap - HT capabilities * - * This structure refers to "HT capabilities element" as - * described in 802.11n draft section 7.3.2.52 + * This structure is the "HT capabilities element" as + * described in 802.11n D5.0 7.3.2.57 */ struct ieee80211_ht_cap { __le16 cap_info; u8 ampdu_params_info; - u8 supp_mcs_set[16]; + + /* 16 bytes MCS information */ + struct ieee80211_mcs_info mcs; + __le16 extended_ht_cap_info; __le32 tx_BF_cap_info; u8 antenna_selection_info; } __attribute__ ((packed)); +/* 802.11n HT capabilities masks (for cap_info) */ +#define IEEE80211_HT_CAP_LDPC_CODING 0x0001 +#define IEEE80211_HT_CAP_SUP_WIDTH_20_40 0x0002 +#define IEEE80211_HT_CAP_SM_PS 0x000C +#define IEEE80211_HT_CAP_GRN_FLD 0x0010 +#define IEEE80211_HT_CAP_SGI_20 0x0020 +#define IEEE80211_HT_CAP_SGI_40 0x0040 +#define IEEE80211_HT_CAP_TX_STBC 0x0080 +#define IEEE80211_HT_CAP_RX_STBC 0x0300 +#define IEEE80211_HT_CAP_DELAY_BA 0x0400 +#define IEEE80211_HT_CAP_MAX_AMSDU 0x0800 +#define IEEE80211_HT_CAP_DSSSCCK40 0x1000 +#define IEEE80211_HT_CAP_PSMP_SUPPORT 0x2000 +#define IEEE80211_HT_CAP_40MHZ_INTOLERANT 0x4000 +#define IEEE80211_HT_CAP_LSIG_TXOP_PROT 0x8000 + +/* 802.11n HT capability AMPDU settings (for ampdu_params_info) */ +#define IEEE80211_HT_AMPDU_PARM_FACTOR 0x03 +#define IEEE80211_HT_AMPDU_PARM_DENSITY 0x1C + /** - * struct ieee80211_ht_cap - HT additional information + * struct ieee80211_ht_info - HT information * - * This structure refers to "HT information element" as - * described in 802.11n draft section 7.3.2.53 + * This structure is the "HT information element" as + * described in 802.11n D5.0 7.3.2.58 */ -struct ieee80211_ht_addt_info { +struct ieee80211_ht_info { u8 control_chan; u8 ht_param; __le16 operation_mode; @@ -714,36 +774,33 @@ struct ieee80211_ht_addt_info { u8 basic_set[16]; } __attribute__ ((packed)); -/* 802.11n HT capabilities masks */ -#define IEEE80211_HT_CAP_SUP_WIDTH 0x0002 -#define IEEE80211_HT_CAP_SM_PS 0x000C -#define IEEE80211_HT_CAP_GRN_FLD 0x0010 -#define IEEE80211_HT_CAP_SGI_20 0x0020 -#define IEEE80211_HT_CAP_SGI_40 0x0040 -#define IEEE80211_HT_CAP_DELAY_BA 0x0400 -#define IEEE80211_HT_CAP_MAX_AMSDU 0x0800 -#define IEEE80211_HT_CAP_DSSSCCK40 0x1000 -/* 802.11n HT capability AMPDU settings */ -#define IEEE80211_HT_CAP_AMPDU_FACTOR 0x03 -#define IEEE80211_HT_CAP_AMPDU_DENSITY 0x1C -/* 802.11n HT capability MSC set */ -#define IEEE80211_SUPP_MCS_SET_UEQM 4 -#define IEEE80211_HT_CAP_MAX_STREAMS 4 -#define IEEE80211_SUPP_MCS_SET_LEN 10 -/* maximum streams the spec allows */ -#define IEEE80211_HT_CAP_MCS_TX_DEFINED 0x01 -#define IEEE80211_HT_CAP_MCS_TX_RX_DIFF 0x02 -#define IEEE80211_HT_CAP_MCS_TX_STREAMS 0x0C -#define IEEE80211_HT_CAP_MCS_TX_UEQM 0x10 -/* 802.11n HT IE masks */ -#define IEEE80211_HT_IE_CHA_SEC_OFFSET 0x03 -#define IEEE80211_HT_IE_CHA_SEC_NONE 0x00 -#define IEEE80211_HT_IE_CHA_SEC_ABOVE 0x01 -#define IEEE80211_HT_IE_CHA_SEC_BELOW 0x03 -#define IEEE80211_HT_IE_CHA_WIDTH 0x04 -#define IEEE80211_HT_IE_HT_PROTECTION 0x0003 -#define IEEE80211_HT_IE_NON_GF_STA_PRSNT 0x0004 -#define IEEE80211_HT_IE_NON_HT_STA_PRSNT 0x0010 +/* for ht_param */ +#define IEEE80211_HT_PARAM_CHA_SEC_OFFSET 0x03 +#define IEEE80211_HT_PARAM_CHA_SEC_NONE 0x00 +#define IEEE80211_HT_PARAM_CHA_SEC_ABOVE 0x01 +#define IEEE80211_HT_PARAM_CHA_SEC_BELOW 0x03 +#define IEEE80211_HT_PARAM_CHAN_WIDTH_ANY 0x04 +#define IEEE80211_HT_PARAM_RIFS_MODE 0x08 +#define IEEE80211_HT_PARAM_SPSMP_SUPPORT 0x10 +#define IEEE80211_HT_PARAM_SERV_INTERVAL_GRAN 0xE0 + +/* for operation_mode */ +#define IEEE80211_HT_OP_MODE_PROTECTION 0x0003 +#define IEEE80211_HT_OP_MODE_PROTECTION_NONE 0 +#define IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER 1 +#define IEEE80211_HT_OP_MODE_PROTECTION_20MHZ 2 +#define IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED 3 +#define IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT 0x0004 +#define IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT 0x0010 + +/* for stbc_param */ +#define IEEE80211_HT_STBC_PARAM_DUAL_BEACON 0x0040 +#define IEEE80211_HT_STBC_PARAM_DUAL_CTS_PROT 0x0080 +#define IEEE80211_HT_STBC_PARAM_STBC_BEACON 0x0100 +#define IEEE80211_HT_STBC_PARAM_LSIG_TXOP_FULLPROT 0x0200 +#define IEEE80211_HT_STBC_PARAM_PCO_ACTIVE 0x0400 +#define IEEE80211_HT_STBC_PARAM_PCO_PHASE 0x0800 + /* block-ack parameters */ #define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002 @@ -949,7 +1006,7 @@ enum ieee80211_eid { WLAN_EID_EXT_SUPP_RATES = 50, /* 802.11n */ WLAN_EID_HT_CAPABILITY = 45, - WLAN_EID_HT_EXTRA_INFO = 61, + WLAN_EID_HT_INFORMATION = 61, /* 802.11i */ WLAN_EID_RSN = 48, WLAN_EID_WPA = 221, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d1466e7a47b8..2870f3973f1a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -191,7 +191,7 @@ enum ieee80211_bss_change { * @beacon_int: beacon interval * @assoc_capability: capabilities taken from assoc resp * @assoc_ht: association in HT mode - * @ht_conf: ht capabilities + * @ht_cap: ht capabilities * @ht_bss_conf: ht extended capabilities * @basic_rates: bitmap of basic rates, each bit stands for an * index into the rate table configured by the driver in @@ -212,7 +212,7 @@ struct ieee80211_bss_conf { u64 basic_rates; /* ht related data */ bool assoc_ht; - struct ieee80211_ht_info *ht_conf; + struct ieee80211_sta_ht_cap *ht_cap; struct ieee80211_ht_bss_info *ht_bss_conf; }; @@ -477,7 +477,7 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) * @antenna_sel_tx: transmit antenna selection, 0: default/diversity, * 1/2: antenna 0/1 * @antenna_sel_rx: receive antenna selection, like @antenna_sel_tx - * @ht_conf: describes current self configuration of 802.11n HT capabilies + * @ht_cap: describes current self configuration of 802.11n HT capabilities * @ht_bss_conf: describes current BSS configuration of 802.11n HT parameters * @channel: the channel to tune to */ @@ -493,7 +493,7 @@ struct ieee80211_conf { struct ieee80211_channel *channel; - struct ieee80211_ht_info ht_conf; + struct ieee80211_sta_ht_cap ht_cap; struct ieee80211_ht_bss_info ht_bss_conf; }; @@ -687,7 +687,7 @@ enum set_key_cmd { * @addr: MAC address * @aid: AID we assigned to the station if we're an AP * @supp_rates: Bitmap of supported rates (per band) - * @ht_info: HT capabilities of this STA + * @ht_cap: HT capabilities of this STA * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *), size is determined in hw information. */ @@ -695,7 +695,7 @@ struct ieee80211_sta { u64 supp_rates[IEEE80211_NUM_BANDS]; u8 addr[ETH_ALEN]; u16 aid; - struct ieee80211_ht_info ht_info; + struct ieee80211_sta_ht_cap ht_cap; /* must be last */ u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); diff --git a/include/net/wireless.h b/include/net/wireless.h index 721efb363db7..c0aa0fb8db5e 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -10,6 +10,7 @@ #include #include #include +#include #include /** @@ -133,23 +134,23 @@ struct ieee80211_rate { }; /** - * struct ieee80211_ht_info - describing STA's HT capabilities + * struct ieee80211_sta_ht_cap - STA's HT capabilities * * This structure describes most essential parameters needed * to describe 802.11n HT capabilities for an STA. * - * @ht_supported: is HT supported by STA, 0: no, 1: yes + * @ht_supported: is HT supported by the STA * @cap: HT capabilities map as described in 802.11n spec * @ampdu_factor: Maximum A-MPDU length factor * @ampdu_density: Minimum A-MPDU spacing - * @supp_mcs_set: Supported MCS set as described in 802.11n spec + * @mcs: Supported MCS rates */ -struct ieee80211_ht_info { +struct ieee80211_sta_ht_cap { u16 cap; /* use IEEE80211_HT_CAP_ */ - u8 ht_supported; + bool ht_supported; u8 ampdu_factor; u8 ampdu_density; - u8 supp_mcs_set[16]; + struct ieee80211_mcs_info mcs; }; /** @@ -173,7 +174,7 @@ struct ieee80211_supported_band { enum ieee80211_band band; int n_channels; int n_bitrates; - struct ieee80211_ht_info ht_info; + struct ieee80211_sta_ht_cap ht_cap; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index cf3fd5d60665..a5dea617aab3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -633,10 +633,9 @@ static void sta_apply_parameters(struct ieee80211_local *local, sta->sta.supp_rates[local->oper_channel->band] = rates; } - if (params->ht_capa) { - ieee80211_ht_cap_ie_to_ht_info(params->ht_capa, - &sta->sta.ht_info); - } + if (params->ht_capa) + ieee80211_ht_cap_ie_to_sta_ht_cap(params->ht_capa, + &sta->sta.ht_cap); if (ieee80211_vif_is_mesh(&sdata->vif) && params->plink_action) { switch (params->plink_action) { diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index b854483cf23f..e2d121bf2745 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -20,37 +20,33 @@ #include "sta_info.h" #include "wme.h" -int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie, - struct ieee80211_ht_info *ht_info) +void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_ht_cap *ht_cap_ie, + struct ieee80211_sta_ht_cap *ht_cap) { - if (ht_info == NULL) - return -EINVAL; + BUG_ON(!ht_cap); - memset(ht_info, 0, sizeof(*ht_info)); + memset(ht_cap, 0, sizeof(*ht_cap)); if (ht_cap_ie) { u8 ampdu_info = ht_cap_ie->ampdu_params_info; - ht_info->ht_supported = 1; - ht_info->cap = le16_to_cpu(ht_cap_ie->cap_info); - ht_info->ampdu_factor = - ampdu_info & IEEE80211_HT_CAP_AMPDU_FACTOR; - ht_info->ampdu_density = - (ampdu_info & IEEE80211_HT_CAP_AMPDU_DENSITY) >> 2; - memcpy(ht_info->supp_mcs_set, ht_cap_ie->supp_mcs_set, 16); + ht_cap->ht_supported = true; + ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info); + ht_cap->ampdu_factor = + ampdu_info & IEEE80211_HT_AMPDU_PARM_FACTOR; + ht_cap->ampdu_density = + (ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2; + memcpy(&ht_cap->mcs, &ht_cap_ie->mcs, sizeof(ht_cap->mcs)); } else - ht_info->ht_supported = 0; - - return 0; + ht_cap->ht_supported = false; } -int ieee80211_ht_addt_info_ie_to_ht_bss_info( - struct ieee80211_ht_addt_info *ht_add_info_ie, +void ieee80211_ht_info_ie_to_ht_bss_info( + struct ieee80211_ht_info *ht_add_info_ie, struct ieee80211_ht_bss_info *bss_info) { - if (bss_info == NULL) - return -EINVAL; + BUG_ON(!bss_info); memset(bss_info, 0, sizeof(*bss_info)); @@ -62,8 +58,119 @@ int ieee80211_ht_addt_info_ie_to_ht_bss_info( bss_info->bss_cap = ht_add_info_ie->ht_param; bss_info->bss_op_mode = (u8)(op_mode & 0xff); } +} + +/* + * ieee80211_handle_ht should be called only after the operating band + * has been determined as ht configuration depends on the hw's + * HT abilities for a specific band. + */ +u32 ieee80211_handle_ht(struct ieee80211_local *local, + struct ieee80211_sta_ht_cap *req_ht_cap, + struct ieee80211_ht_bss_info *req_bss_cap) +{ + struct ieee80211_conf *conf = &local->hw.conf; + struct ieee80211_supported_band *sband; + struct ieee80211_sta_ht_cap ht_cap; + struct ieee80211_ht_bss_info ht_bss_conf; + u32 changed = 0; + int i; + u8 max_tx_streams; + u8 tx_mcs_set_cap; + bool enable_ht = true; + + sband = local->hw.wiphy->bands[conf->channel->band]; + + memset(&ht_cap, 0, sizeof(ht_cap)); + memset(&ht_bss_conf, 0, sizeof(struct ieee80211_ht_bss_info)); + + /* HT is not supported */ + if (!sband->ht_cap.ht_supported) + enable_ht = false; + + /* disable HT */ + if (!enable_ht) { + if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) + changed |= BSS_CHANGED_HT; + conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE; + conf->ht_cap.ht_supported = false; + return changed; + } + + + if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)) + changed |= BSS_CHANGED_HT; + + conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE; + ht_cap.ht_supported = true; + + ht_cap.cap = req_ht_cap->cap & sband->ht_cap.cap; + ht_cap.cap &= ~IEEE80211_HT_CAP_SM_PS; + ht_cap.cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS; + + ht_bss_conf.primary_channel = req_bss_cap->primary_channel; + ht_bss_conf.bss_cap = req_bss_cap->bss_cap; + ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode; + + ht_cap.ampdu_factor = req_ht_cap->ampdu_factor; + ht_cap.ampdu_density = req_ht_cap->ampdu_density; + + /* own MCS TX capabilities */ + tx_mcs_set_cap = sband->ht_cap.mcs.tx_params; + + /* + * configure supported Tx MCS according to requested MCS + * (based in most cases on Rx capabilities of peer) and self + * Tx MCS capabilities (as defined by low level driver HW + * Tx capabilities) + */ + + /* can we TX with MCS rates? */ + if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED)) + goto check_changed; + + /* Counting from 0, therefore +1 */ + if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_RX_DIFF) + max_tx_streams = + ((tx_mcs_set_cap & IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK) + >> IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT) + 1; + else + max_tx_streams = IEEE80211_HT_MCS_TX_MAX_STREAMS; + + /* + * 802.11n D5.0 20.3.5 / 20.6 says: + * - indices 0 to 7 and 32 are single spatial stream + * - 8 to 31 are multiple spatial streams using equal modulation + * [8..15 for two streams, 16..23 for three and 24..31 for four] + * - remainder are multiple spatial streams using unequal modulation + */ + for (i = 0; i < max_tx_streams; i++) + ht_cap.mcs.rx_mask[i] = + sband->ht_cap.mcs.rx_mask[i] & + req_ht_cap->mcs.rx_mask[i]; + + if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION) + for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE; + i < IEEE80211_HT_MCS_MASK_LEN; i++) + ht_cap.mcs.rx_mask[i] = + sband->ht_cap.mcs.rx_mask[i] & + req_ht_cap->mcs.rx_mask[i]; + + /* handle MCS rate 32 too */ + if (sband->ht_cap.mcs.rx_mask[32/8] & + req_ht_cap->mcs.rx_mask[32/8] & 1) + ht_cap.mcs.rx_mask[32/8] |= 1; + + check_changed: + /* if bss configuration changed store the new one */ + if (memcmp(&conf->ht_cap, &ht_cap, sizeof(ht_cap)) || + memcmp(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf))) { + changed |= BSS_CHANGED_HT; + memcpy(&conf->ht_cap, &ht_cap, sizeof(ht_cap)); + memcpy(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf)); + } - return 0; + return changed; } static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, @@ -794,7 +901,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, * check if configuration can support the BA policy * and if buffer size does not exceeds max value */ if (((ba_policy != 1) - && (!(conf->ht_conf.cap & IEEE80211_HT_CAP_DELAY_BA))) + && (!(conf->ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || (buf_size > IEEE80211_MAX_AMPDU_BUF)) { status = WLAN_STATUS_INVALID_QOS_PARAM; #ifdef CONFIG_MAC80211_HT_DEBUG @@ -812,7 +919,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, sband = local->hw.wiphy->bands[conf->channel->band]; buf_size = IEEE80211_MIN_AMPDU_BUF; - buf_size = buf_size << sband->ht_info.ampdu_factor; + buf_size = buf_size << sband->ht_cap.ampdu_factor; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f49cb9e8bb42..ae4ca3e8b443 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -817,7 +817,7 @@ struct ieee802_11_elems { u8 *wmm_info; u8 *wmm_param; struct ieee80211_ht_cap *ht_cap_elem; - struct ieee80211_ht_addt_info *ht_info_elem; + struct ieee80211_ht_info *ht_info_elem; u8 *mesh_config; u8 *mesh_id; u8 *peer_link; @@ -880,9 +880,6 @@ static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) int ieee80211_hw_config(struct ieee80211_local *local); int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed); void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx); -u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht, - struct ieee80211_ht_info *req_ht_cap, - struct ieee80211_ht_bss_info *req_bss_cap); void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, u32 changed); void ieee80211_configure_filter(struct ieee80211_local *local); @@ -963,11 +960,14 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); /* HT */ -int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie, - struct ieee80211_ht_info *ht_info); -int ieee80211_ht_addt_info_ie_to_ht_bss_info( - struct ieee80211_ht_addt_info *ht_add_info_ie, +void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_ht_cap *ht_cap_ie, + struct ieee80211_sta_ht_cap *ht_cap); +void ieee80211_ht_info_ie_to_ht_bss_info( + struct ieee80211_ht_info *ht_add_info_ie, struct ieee80211_ht_bss_info *bss_info); +u32 ieee80211_handle_ht(struct ieee80211_local *local, + struct ieee80211_sta_ht_cap *req_ht_cap, + struct ieee80211_ht_bss_info *req_bss_cap); void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn); void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index c427954fe8e8..07f812755e55 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -232,100 +232,6 @@ int ieee80211_hw_config(struct ieee80211_local *local) return ret; } -/** - * ieee80211_handle_ht should be used only after legacy configuration - * has been determined namely band, as ht configuration depends upon - * the hardware's HT abilities for a _specific_ band. - */ -u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht, - struct ieee80211_ht_info *req_ht_cap, - struct ieee80211_ht_bss_info *req_bss_cap) -{ - struct ieee80211_conf *conf = &local->hw.conf; - struct ieee80211_supported_band *sband; - struct ieee80211_ht_info ht_conf; - struct ieee80211_ht_bss_info ht_bss_conf; - u32 changed = 0; - int i; - u8 max_tx_streams = IEEE80211_HT_CAP_MAX_STREAMS; - u8 tx_mcs_set_cap; - - sband = local->hw.wiphy->bands[conf->channel->band]; - - memset(&ht_conf, 0, sizeof(struct ieee80211_ht_info)); - memset(&ht_bss_conf, 0, sizeof(struct ieee80211_ht_bss_info)); - - /* HT is not supported */ - if (!sband->ht_info.ht_supported) { - conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE; - goto out; - } - - /* disable HT */ - if (!enable_ht) { - if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) - changed |= BSS_CHANGED_HT; - conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE; - conf->ht_conf.ht_supported = 0; - goto out; - } - - - if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)) - changed |= BSS_CHANGED_HT; - - conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE; - ht_conf.ht_supported = 1; - - ht_conf.cap = req_ht_cap->cap & sband->ht_info.cap; - ht_conf.cap &= ~(IEEE80211_HT_CAP_SM_PS); - ht_conf.cap |= sband->ht_info.cap & IEEE80211_HT_CAP_SM_PS; - ht_bss_conf.primary_channel = req_bss_cap->primary_channel; - ht_bss_conf.bss_cap = req_bss_cap->bss_cap; - ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode; - - ht_conf.ampdu_factor = req_ht_cap->ampdu_factor; - ht_conf.ampdu_density = req_ht_cap->ampdu_density; - - /* Bits 96-100 */ - tx_mcs_set_cap = sband->ht_info.supp_mcs_set[12]; - - /* configure suppoerted Tx MCS according to requested MCS - * (based in most cases on Rx capabilities of peer) and self - * Tx MCS capabilities (as defined by low level driver HW - * Tx capabilities) */ - if (!(tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_DEFINED)) - goto check_changed; - - /* Counting from 0 therfore + 1 */ - if (tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_RX_DIFF) - max_tx_streams = ((tx_mcs_set_cap & - IEEE80211_HT_CAP_MCS_TX_STREAMS) >> 2) + 1; - - for (i = 0; i < max_tx_streams; i++) - ht_conf.supp_mcs_set[i] = - sband->ht_info.supp_mcs_set[i] & - req_ht_cap->supp_mcs_set[i]; - - if (tx_mcs_set_cap & IEEE80211_HT_CAP_MCS_TX_UEQM) - for (i = IEEE80211_SUPP_MCS_SET_UEQM; - i < IEEE80211_SUPP_MCS_SET_LEN; i++) - ht_conf.supp_mcs_set[i] = - sband->ht_info.supp_mcs_set[i] & - req_ht_cap->supp_mcs_set[i]; - -check_changed: - /* if bss configuration changed store the new one */ - if (memcmp(&conf->ht_conf, &ht_conf, sizeof(ht_conf)) || - memcmp(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf))) { - changed |= BSS_CHANGED_HT; - memcpy(&conf->ht_conf, &ht_conf, sizeof(ht_conf)); - memcpy(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf)); - } -out: - return changed; -} - void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, u32 changed) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 829995e740a7..196dd39f6286 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -236,7 +236,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - u8 *pos, *ies, *ht_add_ie; + u8 *pos, *ies, *ht_ie; int i, len, count, rates_len, supp_rates_len; u16 capab; struct ieee80211_bss *bss; @@ -393,24 +393,25 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, /* wmm support is a must to HT */ if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) && - sband->ht_info.ht_supported && - (ht_add_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_EXTRA_INFO))) { - struct ieee80211_ht_addt_info *ht_add_info = - (struct ieee80211_ht_addt_info *)ht_add_ie; - u16 cap = sband->ht_info.cap; + sband->ht_cap.ht_supported && + (ht_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_INFORMATION)) && + ht_ie[1] >= sizeof(struct ieee80211_ht_info)) { + struct ieee80211_ht_info *ht_info = + (struct ieee80211_ht_info *)(ht_ie + 2); + u16 cap = sband->ht_cap.cap; __le16 tmp; u32 flags = local->hw.conf.channel->flags; - switch (ht_add_info->ht_param & IEEE80211_HT_IE_CHA_SEC_OFFSET) { - case IEEE80211_HT_IE_CHA_SEC_ABOVE: + switch (ht_info->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { + case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: if (flags & IEEE80211_CHAN_NO_FAT_ABOVE) { - cap &= ~IEEE80211_HT_CAP_SUP_WIDTH; + cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; cap &= ~IEEE80211_HT_CAP_SGI_40; } break; - case IEEE80211_HT_IE_CHA_SEC_BELOW: + case IEEE80211_HT_PARAM_CHA_SEC_BELOW: if (flags & IEEE80211_CHAN_NO_FAT_BELOW) { - cap &= ~IEEE80211_HT_CAP_SUP_WIDTH; + cap &= ~IEEE80211_HT_CAP_SUP_WIDTH_20_40; cap &= ~IEEE80211_HT_CAP_SGI_40; } break; @@ -424,9 +425,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, memcpy(pos, &tmp, sizeof(u16)); pos += sizeof(u16); /* TODO: needs a define here for << 2 */ - *pos++ = sband->ht_info.ampdu_factor | - (sband->ht_info.ampdu_density << 2); - memcpy(pos, sband->ht_info.supp_mcs_set, 16); + *pos++ = sband->ht_cap.ampdu_factor | + (sband->ht_cap.ampdu_density << 2); + memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs)); } kfree(ifsta->assocreq_ies); @@ -730,7 +731,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) { changed |= BSS_CHANGED_HT; sdata->bss_conf.assoc_ht = 1; - sdata->bss_conf.ht_conf = &conf->ht_conf; + sdata->bss_conf.ht_cap = &conf->ht_cap; sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf; } @@ -850,7 +851,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, changed |= BSS_CHANGED_HT; sdata->bss_conf.assoc_ht = 0; - sdata->bss_conf.ht_conf = NULL; + sdata->bss_conf.ht_cap = NULL; sdata->bss_conf.ht_bss_conf = NULL; ieee80211_led_assoc(local, 0); @@ -1335,11 +1336,11 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { struct ieee80211_ht_bss_info bss_info; - ieee80211_ht_cap_ie_to_ht_info( - elems.ht_cap_elem, &sta->sta.ht_info); - ieee80211_ht_addt_info_ie_to_ht_bss_info( + ieee80211_ht_cap_ie_to_sta_ht_cap( + elems.ht_cap_elem, &sta->sta.ht_cap); + ieee80211_ht_info_ie_to_ht_bss_info( elems.ht_info_elem, &bss_info); - ieee80211_handle_ht(local, 1, &sta->sta.ht_info, &bss_info); + ieee80211_handle_ht(local, &sta->sta.ht_cap, &bss_info); } rate_control_rate_init(sta); @@ -1696,9 +1697,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, elems.wmm_param && conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) { struct ieee80211_ht_bss_info bss_info; - ieee80211_ht_addt_info_ie_to_ht_bss_info( + ieee80211_ht_info_ie_to_ht_bss_info( elems.ht_info_elem, &bss_info); - changed |= ieee80211_handle_ht(local, 1, &conf->ht_conf, + changed |= ieee80211_handle_ht(local, &conf->ht_cap, &bss_info); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1b605457017e..9941a60a2327 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -532,8 +532,8 @@ void ieee802_11_parse_elems(u8 *start, size_t len, if (elen >= sizeof(struct ieee80211_ht_cap)) elems->ht_cap_elem = (void *)pos; break; - case WLAN_EID_HT_EXTRA_INFO: - if (elen >= sizeof(struct ieee80211_ht_addt_info)) + case WLAN_EID_HT_INFORMATION: + if (elen >= sizeof(struct ieee80211_ht_info)) elems->ht_info_elem = (void *)pos; break; case WLAN_EID_MESH_ID: diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 29c41040c8c9..a3af15141244 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -147,7 +147,7 @@ static int ieee80211_ioctl_giwname(struct net_device *dev, sband = local->hw.wiphy->bands[IEEE80211_BAND_5GHZ]; if (sband) { is_a = 1; - is_ht |= sband->ht_info.ht_supported; + is_ht |= sband->ht_cap.ht_supported; } sband = local->hw.wiphy->bands[IEEE80211_BAND_2GHZ]; @@ -160,7 +160,7 @@ static int ieee80211_ioctl_giwname(struct net_device *dev, if (sband->bitrates[i].bitrate == 60) is_g = 1; } - is_ht |= sband->ht_info.ht_supported; + is_ht |= sband->ht_cap.ht_supported; } strcpy(name, "IEEE 802.11"); -- cgit v1.2.3 From 0f4ac38b5999c3d51adad52d61c56c1b99c247ec Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Oct 2008 12:18:04 +0200 Subject: mac80211: kill hw.conf.antenna_sel_{rx,tx} Never actually used. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/b43/main.c | 25 +++---------------------- drivers/net/wireless/b43legacy/main.c | 18 ++---------------- drivers/net/wireless/p54/p54common.c | 3 +-- drivers/net/wireless/rt2x00/rt2x00config.c | 20 ++++---------------- drivers/net/wireless/rt2x00/rt2x00dev.c | 6 ++---- include/net/mac80211.h | 7 +------ net/mac80211/debugfs.c | 8 -------- net/mac80211/ieee80211_i.h | 2 -- net/mac80211/tx.c | 1 - 9 files changed, 13 insertions(+), 77 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 6e773018cba0..6b85428b0e1d 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -1339,25 +1339,6 @@ u8 b43_ieee80211_antenna_sanitize(struct b43_wldev *dev, return antenna_nr; } -static int b43_antenna_from_ieee80211(struct b43_wldev *dev, u8 antenna) -{ - antenna = b43_ieee80211_antenna_sanitize(dev, antenna); - switch (antenna) { - case 0: /* default/diversity */ - return B43_ANTENNA_DEFAULT; - case 1: /* Antenna 0 */ - return B43_ANTENNA0; - case 2: /* Antenna 1 */ - return B43_ANTENNA1; - case 3: /* Antenna 2 */ - return B43_ANTENNA2; - case 4: /* Antenna 3 */ - return B43_ANTENNA3; - default: - return B43_ANTENNA_DEFAULT; - } -} - /* Convert a b43 antenna number value to the PHY TX control value. */ static u16 b43_antenna_to_phyctl(int antenna) { @@ -1399,7 +1380,7 @@ static void b43_write_beacon_template(struct b43_wldev *dev, len, ram_offset, shm_size_offset, rate); /* Write the PHY TX control parameters. */ - antenna = b43_antenna_from_ieee80211(dev, info->antenna_sel_tx); + antenna = B43_ANTENNA_DEFAULT; antenna = b43_antenna_to_phyctl(antenna); ctl = b43_shm_read16(dev, B43_SHM_SHARED, B43_SHM_SH_BEACPHYCTL); /* We can't send beacons with short preamble. Would get PHY errors. */ @@ -3399,9 +3380,9 @@ static int b43_op_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf) } /* Antennas for RX and management frame TX. */ - antenna = b43_antenna_from_ieee80211(dev, conf->antenna_sel_tx); + antenna = B43_ANTENNA_DEFAULT; b43_mgmtframe_txantenna(dev, antenna); - antenna = b43_antenna_from_ieee80211(dev, conf->antenna_sel_rx); + antenna = B43_ANTENNA_DEFAULT; if (phy->ops->set_rx_antenna) phy->ops->set_rx_antenna(dev, antenna); diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index c66d57560e7c..867f01ce45c7 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -2556,20 +2556,6 @@ init_failure: return err; } -static int b43legacy_antenna_from_ieee80211(u8 antenna) -{ - switch (antenna) { - case 0: /* default/diversity */ - return B43legacy_ANTENNA_DEFAULT; - case 1: /* Antenna 0 */ - return B43legacy_ANTENNA0; - case 2: /* Antenna 1 */ - return B43legacy_ANTENNA1; - default: - return B43legacy_ANTENNA_DEFAULT; - } -} - static int b43legacy_op_dev_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf) { @@ -2583,8 +2569,8 @@ static int b43legacy_op_dev_config(struct ieee80211_hw *hw, int err = 0; u32 savedirqs; - antenna_tx = b43legacy_antenna_from_ieee80211(conf->antenna_sel_tx); - antenna_rx = b43legacy_antenna_from_ieee80211(conf->antenna_sel_rx); + antenna_tx = B43legacy_ANTENNA_DEFAULT; + antenna_rx = B43legacy_ANTENNA_DEFAULT; mutex_lock(&wl->mutex); dev = wl->current_dev; diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c index b1dc10ccd32c..b3e75eb4d5ba 100644 --- a/drivers/net/wireless/p54/p54common.c +++ b/drivers/net/wireless/p54/p54common.c @@ -1198,8 +1198,7 @@ static int p54_config(struct ieee80211_hw *dev, struct ieee80211_conf *conf) struct p54_common *priv = dev->priv; mutex_lock(&priv->conf_mutex); - priv->rx_antenna = (conf->antenna_sel_rx == 0) ? - 2 : conf->antenna_sel_tx - 1; + priv->rx_antenna = 2; /* automatic */ priv->output_power = conf->power_level << 2; ret = p54_set_freq(dev, cpu_to_le16(conf->channel->center_freq)); p54_set_vdcf(dev); diff --git a/drivers/net/wireless/rt2x00/rt2x00config.c b/drivers/net/wireless/rt2x00/rt2x00config.c index 4d5e87b015a0..7910147157b5 100644 --- a/drivers/net/wireless/rt2x00/rt2x00config.c +++ b/drivers/net/wireless/rt2x00/rt2x00config.c @@ -199,23 +199,15 @@ void rt2x00lib_config(struct rt2x00_dev *rt2x00dev, * to work with untill the link tuner decides that an antenna * switch should be performed. */ - if (!conf->antenna_sel_rx && - default_ant->rx != ANTENNA_SW_DIVERSITY && + if (default_ant->rx != ANTENNA_SW_DIVERSITY && default_ant->rx != active_ant->rx) flags |= CONFIG_UPDATE_ANTENNA; - else if (conf->antenna_sel_rx && - conf->antenna_sel_rx != active_ant->rx) - flags |= CONFIG_UPDATE_ANTENNA; else if (active_ant->rx == ANTENNA_SW_DIVERSITY) flags |= CONFIG_UPDATE_ANTENNA; - if (!conf->antenna_sel_tx && - default_ant->tx != ANTENNA_SW_DIVERSITY && + if (default_ant->tx != ANTENNA_SW_DIVERSITY && default_ant->tx != active_ant->tx) flags |= CONFIG_UPDATE_ANTENNA; - else if (conf->antenna_sel_tx && - conf->antenna_sel_tx != active_ant->tx) - flags |= CONFIG_UPDATE_ANTENNA; else if (active_ant->tx == ANTENNA_SW_DIVERSITY) flags |= CONFIG_UPDATE_ANTENNA; @@ -252,18 +244,14 @@ config: } if (flags & CONFIG_UPDATE_ANTENNA) { - if (conf->antenna_sel_rx) - libconf.ant.rx = conf->antenna_sel_rx; - else if (default_ant->rx != ANTENNA_SW_DIVERSITY) + if (default_ant->rx != ANTENNA_SW_DIVERSITY) libconf.ant.rx = default_ant->rx; else if (active_ant->rx == ANTENNA_SW_DIVERSITY) libconf.ant.rx = ANTENNA_B; else libconf.ant.rx = active_ant->rx; - if (conf->antenna_sel_tx) - libconf.ant.tx = conf->antenna_sel_tx; - else if (default_ant->tx != ANTENNA_SW_DIVERSITY) + if (default_ant->tx != ANTENNA_SW_DIVERSITY) libconf.ant.tx = default_ant->tx; else if (active_ant->tx == ANTENNA_SW_DIVERSITY) libconf.ant.tx = ANTENNA_B; diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 86840e3585e8..9e0472bd1edf 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -249,11 +249,9 @@ static void rt2x00lib_evaluate_antenna(struct rt2x00_dev *rt2x00dev) rt2x00dev->link.ant.flags &= ~ANTENNA_RX_DIVERSITY; rt2x00dev->link.ant.flags &= ~ANTENNA_TX_DIVERSITY; - if (rt2x00dev->hw->conf.antenna_sel_rx == 0 && - rt2x00dev->default_ant.rx == ANTENNA_SW_DIVERSITY) + if (rt2x00dev->default_ant.rx == ANTENNA_SW_DIVERSITY) rt2x00dev->link.ant.flags |= ANTENNA_RX_DIVERSITY; - if (rt2x00dev->hw->conf.antenna_sel_tx == 0 && - rt2x00dev->default_ant.tx == ANTENNA_SW_DIVERSITY) + if (rt2x00dev->default_ant.tx == ANTENNA_SW_DIVERSITY) rt2x00dev->link.ant.flags |= ANTENNA_TX_DIVERSITY; if (!(rt2x00dev->link.ant.flags & ANTENNA_RX_DIVERSITY) && diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2870f3973f1a..5f28b7f89887 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -324,7 +324,7 @@ struct ieee80211_tx_altrate { * @flags: transmit info flags, defined above * @band: TBD * @tx_rate_idx: TBD - * @antenna_sel_tx: TBD + * @antenna_sel_tx: antenna to use, 0 for automatic diversity * @control: union for control data * @status: union for status data * @driver_data: array of driver_data pointers @@ -474,9 +474,6 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) * @listen_interval: listen interval in units of beacon interval * @flags: configuration flags defined above * @power_level: requested transmit power (in dBm) - * @antenna_sel_tx: transmit antenna selection, 0: default/diversity, - * 1/2: antenna 0/1 - * @antenna_sel_rx: receive antenna selection, like @antenna_sel_tx * @ht_cap: describes current self configuration of 802.11n HT capabilities * @ht_bss_conf: describes current BSS configuration of 802.11n HT parameters * @channel: the channel to tune to @@ -488,8 +485,6 @@ struct ieee80211_conf { u16 listen_interval; u32 flags; int power_level; - u8 antenna_sel_tx; - u8 antenna_sel_rx; struct ieee80211_channel *channel; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 24ce54463310..767dbca50adb 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -47,10 +47,6 @@ static const struct file_operations name## _ops = { \ DEBUGFS_READONLY_FILE(frequency, 20, "%d", local->hw.conf.channel->center_freq); -DEBUGFS_READONLY_FILE(antenna_sel_tx, 20, "%d", - local->hw.conf.antenna_sel_tx); -DEBUGFS_READONLY_FILE(antenna_sel_rx, 20, "%d", - local->hw.conf.antenna_sel_rx); DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d", local->rts_threshold); DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d", @@ -202,8 +198,6 @@ void debugfs_hw_add(struct ieee80211_local *local) local->debugfs.keys = debugfs_create_dir("keys", phyd); DEBUGFS_ADD(frequency); - DEBUGFS_ADD(antenna_sel_tx); - DEBUGFS_ADD(antenna_sel_rx); DEBUGFS_ADD(rts_threshold); DEBUGFS_ADD(fragmentation_threshold); DEBUGFS_ADD(short_retry_limit); @@ -258,8 +252,6 @@ void debugfs_hw_add(struct ieee80211_local *local) void debugfs_hw_del(struct ieee80211_local *local) { DEBUGFS_DEL(frequency); - DEBUGFS_DEL(antenna_sel_tx); - DEBUGFS_DEL(antenna_sel_rx); DEBUGFS_DEL(rts_threshold); DEBUGFS_DEL(fragmentation_threshold); DEBUGFS_DEL(short_retry_limit); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ae4ca3e8b443..88015838a63c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -727,8 +727,6 @@ struct ieee80211_local { struct dentry *rcdir; struct dentry *rcname; struct dentry *frequency; - struct dentry *antenna_sel_tx; - struct dentry *antenna_sel_rx; struct dentry *rts_threshold; struct dentry *fragmentation_threshold; struct dentry *short_retry_limit; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 9191b510bff8..22494bcf488a 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1971,7 +1971,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, sband->bitrates[rsel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE; - info->antenna_sel_tx = local->hw.conf.antenna_sel_tx; info->control.retry_limit = 1; out: -- cgit v1.2.3 From e8975581f63870be42ff4662b293d1b0c8c21350 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Oct 2008 12:18:51 +0200 Subject: mac80211: introduce hw config change flags This makes mac80211 notify the driver which configuration actually changed, e.g. channel etc. No driver changes, this is just plumbing, driver authors are expected to act on this if they want to. Also remove the HW CONFIG debug printk, it's incorrect, often we configure something else. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/adm8211.c | 3 ++- drivers/net/wireless/ath5k/base.c | 7 +++---- drivers/net/wireless/ath9k/main.c | 4 ++-- drivers/net/wireless/b43/main.c | 3 ++- drivers/net/wireless/b43legacy/main.c | 3 ++- drivers/net/wireless/iwlwifi/iwl-agn.c | 3 ++- drivers/net/wireless/iwlwifi/iwl3945-base.c | 7 ++++--- drivers/net/wireless/libertas_tf/main.c | 4 +++- drivers/net/wireless/mac80211_hwsim.c | 4 ++-- drivers/net/wireless/p54/p54common.c | 3 ++- drivers/net/wireless/rt2x00/rt2x00.h | 2 +- drivers/net/wireless/rt2x00/rt2x00dev.c | 2 +- drivers/net/wireless/rt2x00/rt2x00mac.c | 3 ++- drivers/net/wireless/rtl8180_dev.c | 3 ++- drivers/net/wireless/rtl8187_dev.c | 3 ++- drivers/net/wireless/zd1211rw/zd_mac.c | 4 +++- include/net/mac80211.h | 30 ++++++++++++++++++++++++----- net/mac80211/cfg.c | 3 ++- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/iface.c | 25 ++++++++++++++++++------ net/mac80211/main.c | 29 +++++++++++++++------------- net/mac80211/scan.c | 12 +++++++++--- net/mac80211/util.c | 3 ++- net/mac80211/wext.c | 14 +++++++------- 24 files changed, 116 insertions(+), 60 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c index 6e18c9d36787..9a1e0c514c08 100644 --- a/drivers/net/wireless/adm8211.c +++ b/drivers/net/wireless/adm8211.c @@ -1314,9 +1314,10 @@ static int adm8211_set_ssid(struct ieee80211_hw *dev, u8 *ssid, size_t ssid_len) return 0; } -static int adm8211_config(struct ieee80211_hw *dev, struct ieee80211_conf *conf) +static int adm8211_config(struct ieee80211_hw *dev, u32 changed) { struct adm8211_priv *priv = dev->priv; + struct ieee80211_conf *conf = &dev->conf; int channel = ieee80211_frequency_to_channel(conf->channel->center_freq); if (channel != priv->channel) { diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c index 9e47d727e220..fcd688765d04 100644 --- a/drivers/net/wireless/ath5k/base.c +++ b/drivers/net/wireless/ath5k/base.c @@ -219,8 +219,7 @@ static int ath5k_add_interface(struct ieee80211_hw *hw, struct ieee80211_if_init_conf *conf); static void ath5k_remove_interface(struct ieee80211_hw *hw, struct ieee80211_if_init_conf *conf); -static int ath5k_config(struct ieee80211_hw *hw, - struct ieee80211_conf *conf); +static int ath5k_config(struct ieee80211_hw *hw, u32 changed); static int ath5k_config_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_if_conf *conf); @@ -2780,10 +2779,10 @@ end: * TODO: Phy disable/diversity etc */ static int -ath5k_config(struct ieee80211_hw *hw, - struct ieee80211_conf *conf) +ath5k_config(struct ieee80211_hw *hw, u32 changed) { struct ath5k_softc *sc = hw->priv; + struct ieee80211_conf *conf = &hw->conf; sc->bintval = conf->beacon_int; sc->power_level = conf->power_level; diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c index 5e087c92a6d9..f49910799ede 100644 --- a/drivers/net/wireless/ath9k/main.c +++ b/drivers/net/wireless/ath9k/main.c @@ -1230,11 +1230,11 @@ static void ath9k_remove_interface(struct ieee80211_hw *hw, __func__, error); } -static int ath9k_config(struct ieee80211_hw *hw, - struct ieee80211_conf *conf) +static int ath9k_config(struct ieee80211_hw *hw, u32 changed) { struct ath_softc *sc = hw->priv; struct ieee80211_channel *curchan = hw->conf.channel; + struct ieee80211_conf *conf = &hw->conf; int pos; DPRINTF(sc, ATH_DBG_CONFIG, "%s: Set channel: %d MHz\n", diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 6b85428b0e1d..2e81af1022e4 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3320,11 +3320,12 @@ init_failure: return err; } -static int b43_op_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf) +static int b43_op_config(struct ieee80211_hw *hw, u32 changed) { struct b43_wl *wl = hw_to_b43_wl(hw); struct b43_wldev *dev; struct b43_phy *phy; + struct ieee80211_conf *conf = &hw->conf; unsigned long flags; int antenna; int err = 0; diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 867f01ce45c7..793cc396562f 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -2557,11 +2557,12 @@ init_failure: } static int b43legacy_op_dev_config(struct ieee80211_hw *hw, - struct ieee80211_conf *conf) + u32 changed) { struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw); struct b43legacy_wldev *dev; struct b43legacy_phy *phy; + struct ieee80211_conf *conf = &hw->conf; unsigned long flags; unsigned int new_phymode = 0xFFFF; int antenna_tx; diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index e6695e80fb53..79a24410dd0a 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -2734,10 +2734,11 @@ static int iwl4965_mac_add_interface(struct ieee80211_hw *hw, * be set inappropriately and the driver currently sets the hardware up to * use it whenever needed. */ -static int iwl4965_mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf) +static int iwl4965_mac_config(struct ieee80211_hw *hw, u32 changed) { struct iwl_priv *priv = hw->priv; const struct iwl_channel_info *ch_info; + struct ieee80211_conf *conf = &hw->conf; unsigned long flags; int ret = 0; u16 channel; diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index d3a2966d9181..b1464c71ea0a 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -6415,7 +6415,7 @@ static void iwl3945_bg_abort_scan(struct work_struct *work) mutex_unlock(&priv->mutex); } -static int iwl3945_mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf); +static int iwl3945_mac_config(struct ieee80211_hw *hw, u32 changed); static void iwl3945_bg_scan_completed(struct work_struct *work) { @@ -6428,7 +6428,7 @@ static void iwl3945_bg_scan_completed(struct work_struct *work) return; if (test_bit(STATUS_CONF_PENDING, &priv->status)) - iwl3945_mac_config(priv->hw, ieee80211_get_hw_conf(priv->hw)); + iwl3945_mac_config(priv->hw, 0); ieee80211_scan_completed(priv->hw); @@ -6616,10 +6616,11 @@ static int iwl3945_mac_add_interface(struct ieee80211_hw *hw, * be set inappropriately and the driver currently sets the hardware up to * use it whenever needed. */ -static int iwl3945_mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf) +static int iwl3945_mac_config(struct ieee80211_hw *hw, u32 changed) { struct iwl3945_priv *priv = hw->priv; const struct iwl3945_channel_info *ch_info; + struct ieee80211_conf *conf = &hw->conf; unsigned long flags; int ret = 0; diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c index feff945ad856..241ddcfa352e 100644 --- a/drivers/net/wireless/libertas_tf/main.c +++ b/drivers/net/wireless/libertas_tf/main.c @@ -354,9 +354,11 @@ static void lbtf_op_remove_interface(struct ieee80211_hw *hw, priv->vif = NULL; } -static int lbtf_op_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf) +static int lbtf_op_config(struct ieee80211_hw *hw, u32 changed) { struct lbtf_private *priv = hw->priv; + struct ieee80211_conf *conf = &hw->conf; + if (conf->channel->center_freq != priv->cur_freq) { priv->cur_freq = conf->channel->center_freq; lbtf_set_channel(priv, conf->channel->hw_value); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 3f236b546683..e2aeef8de707 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -359,10 +359,10 @@ static void mac80211_hwsim_beacon(unsigned long arg) } -static int mac80211_hwsim_config(struct ieee80211_hw *hw, - struct ieee80211_conf *conf) +static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed) { struct mac80211_hwsim_data *data = hw->priv; + struct ieee80211_conf *conf = &hw->conf; printk(KERN_DEBUG "%s:%s (freq=%d radio_enabled=%d beacon_int=%d)\n", wiphy_name(hw->wiphy), __func__, diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c index b3e75eb4d5ba..b3c297ed00cd 100644 --- a/drivers/net/wireless/p54/p54common.c +++ b/drivers/net/wireless/p54/p54common.c @@ -1192,10 +1192,11 @@ static void p54_remove_interface(struct ieee80211_hw *dev, p54_set_filter(dev, 0, NULL); } -static int p54_config(struct ieee80211_hw *dev, struct ieee80211_conf *conf) +static int p54_config(struct ieee80211_hw *dev, u32 changed) { int ret; struct p54_common *priv = dev->priv; + struct ieee80211_conf *conf = &dev->conf; mutex_lock(&priv->conf_mutex); priv->rx_antenna = 2; /* automatic */ diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index 1359a3768404..8ec8f7e9ec64 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -997,7 +997,7 @@ int rt2x00mac_add_interface(struct ieee80211_hw *hw, struct ieee80211_if_init_conf *conf); void rt2x00mac_remove_interface(struct ieee80211_hw *hw, struct ieee80211_if_init_conf *conf); -int rt2x00mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf); +int rt2x00mac_config(struct ieee80211_hw *hw, u32 changed); int rt2x00mac_config_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_if_conf *conf); diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 9e0472bd1edf..697806cf94e2 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -1245,7 +1245,7 @@ int rt2x00lib_resume(struct rt2x00_dev *rt2x00dev) /* * Reconfigure device. */ - retval = rt2x00mac_config(rt2x00dev->hw, &rt2x00dev->hw->conf); + retval = rt2x00mac_config(rt2x00dev->hw, ~0); if (retval) goto exit; diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index 2c6cc5c374ff..da7b49a364ff 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -335,9 +335,10 @@ void rt2x00mac_remove_interface(struct ieee80211_hw *hw, } EXPORT_SYMBOL_GPL(rt2x00mac_remove_interface); -int rt2x00mac_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf) +int rt2x00mac_config(struct ieee80211_hw *hw, u32 changed) { struct rt2x00_dev *rt2x00dev = hw->priv; + struct ieee80211_conf *conf = &hw->conf; int radio_on; int status; diff --git a/drivers/net/wireless/rtl8180_dev.c b/drivers/net/wireless/rtl8180_dev.c index 9de8f57486df..e8d22393797f 100644 --- a/drivers/net/wireless/rtl8180_dev.c +++ b/drivers/net/wireless/rtl8180_dev.c @@ -692,9 +692,10 @@ static void rtl8180_remove_interface(struct ieee80211_hw *dev, priv->vif = NULL; } -static int rtl8180_config(struct ieee80211_hw *dev, struct ieee80211_conf *conf) +static int rtl8180_config(struct ieee80211_hw *dev, u32 changed) { struct rtl8180_priv *priv = dev->priv; + struct ieee80211_conf *conf = &dev->conf; priv->rf->set_chan(dev, conf); diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c index 9ceae9017f86..2c69ab37c650 100644 --- a/drivers/net/wireless/rtl8187_dev.c +++ b/drivers/net/wireless/rtl8187_dev.c @@ -873,9 +873,10 @@ static void rtl8187_remove_interface(struct ieee80211_hw *dev, mutex_unlock(&priv->conf_mutex); } -static int rtl8187_config(struct ieee80211_hw *dev, struct ieee80211_conf *conf) +static int rtl8187_config(struct ieee80211_hw *dev, u32 changed) { struct rtl8187_priv *priv = dev->priv; + struct ieee80211_conf *conf = &dev->conf; u32 reg; mutex_lock(&priv->conf_mutex); diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index de45509d757e..6c3e21887fc8 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -743,9 +743,11 @@ static void zd_op_remove_interface(struct ieee80211_hw *hw, zd_write_mac_addr(&mac->chip, NULL); } -static int zd_op_config(struct ieee80211_hw *hw, struct ieee80211_conf *conf) +static int zd_op_config(struct ieee80211_hw *hw, u32 changed) { struct zd_mac *mac = zd_hw_mac(hw); + struct ieee80211_conf *conf = &hw->conf; + return zd_chip_set_channel(&mac->chip, conf->channel->hw_value); } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5f28b7f89887..34e8569b59bb 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -463,13 +463,33 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) } #define IEEE80211_CONF_SHORT_SLOT_TIME (__IEEE80211_CONF_SHORT_SLOT_TIME()) +/** + * enum ieee80211_conf_changed - denotes which configuration changed + * + * @IEEE80211_CONF_CHANGE_RADIO_ENABLED: the value of radio_enabled changed + * @IEEE80211_CONF_CHANGE_BEACON_INTERVAL: the beacon interval changed + * @IEEE80211_CONF_CHANGE_LISTEN_INTERVAL: the listen interval changed + * @IEEE80211_CONF_CHANGE_RADIOTAP: the radiotap flag changed + * @IEEE80211_CONF_CHANGE_PS: the PS flag changed + * @IEEE80211_CONF_CHANGE_POWER: the TX power changed + * @IEEE80211_CONF_CHANGE_CHANNEL: the channel changed + */ +enum ieee80211_conf_changed { + IEEE80211_CONF_CHANGE_RADIO_ENABLED = BIT(0), + IEEE80211_CONF_CHANGE_BEACON_INTERVAL = BIT(1), + IEEE80211_CONF_CHANGE_LISTEN_INTERVAL = BIT(2), + IEEE80211_CONF_CHANGE_RADIOTAP = BIT(3), + IEEE80211_CONF_CHANGE_PS = BIT(4), + IEEE80211_CONF_CHANGE_POWER = BIT(5), + IEEE80211_CONF_CHANGE_CHANNEL = BIT(6), +}; + /** * struct ieee80211_conf - configuration of the device * * This struct indicates how the driver shall configure the hardware. * * @radio_enabled: when zero, driver is required to switch off the radio. - * TODO make a flag * @beacon_int: beacon interval (TODO make interface config) * @listen_interval: listen interval in units of beacon interval * @flags: configuration flags defined above @@ -479,13 +499,13 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) * @channel: the channel to tune to */ struct ieee80211_conf { - int radio_enabled; - int beacon_int; - u16 listen_interval; u32 flags; int power_level; + u16 listen_interval; + bool radio_enabled; + struct ieee80211_channel *channel; struct ieee80211_sta_ht_cap ht_cap; @@ -1214,7 +1234,7 @@ struct ieee80211_ops { struct ieee80211_if_init_conf *conf); void (*remove_interface)(struct ieee80211_hw *hw, struct ieee80211_if_init_conf *conf); - int (*config)(struct ieee80211_hw *hw, struct ieee80211_conf *conf); + int (*config)(struct ieee80211_hw *hw, u32 changed); int (*config_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_if_conf *conf); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a5dea617aab3..8ea30902d5db 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -394,7 +394,8 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, */ if (params->interval) { sdata->local->hw.conf.beacon_int = params->interval; - ieee80211_hw_config(sdata->local); + ieee80211_hw_config(sdata->local, + IEEE80211_CONF_CHANGE_BEACON_INTERVAL); /* * We updated some parameter so if below bails out * it's not an error. diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 88015838a63c..1deb787ff8dc 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -875,7 +875,7 @@ static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) } -int ieee80211_hw_config(struct ieee80211_local *local); +int ieee80211_hw_config(struct ieee80211_local *local, u32 changed); int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed); void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx); void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8336fee68d3e..df28c5f7c9c0 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -65,7 +65,7 @@ static int ieee80211_open(struct net_device *dev) struct ieee80211_if_init_conf conf; u32 changed = 0; int res; - bool need_hw_reconfig = 0; + u32 hw_reconf_flags = 0; u8 null_addr[ETH_ALEN] = {0}; /* fail early if user set an invalid address */ @@ -152,7 +152,8 @@ static int ieee80211_open(struct net_device *dev) res = local->ops->start(local_to_hw(local)); if (res) goto err_del_bss; - need_hw_reconfig = 1; + /* we're brought up, everything changes */ + hw_reconf_flags = ~0; ieee80211_led_radio(local, local->hw.conf.radio_enabled); } @@ -198,8 +199,10 @@ static int ieee80211_open(struct net_device *dev) /* must be before the call to ieee80211_configure_filter */ local->monitors++; - if (local->monitors == 1) + if (local->monitors == 1) { local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; + hw_reconf_flags |= IEEE80211_CONF_CHANGE_RADIOTAP; + } if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) local->fif_fcsfail++; @@ -279,8 +282,8 @@ static int ieee80211_open(struct net_device *dev) atomic_inc(&local->iff_promiscs); local->open_count++; - if (need_hw_reconfig) { - ieee80211_hw_config(local); + if (hw_reconf_flags) { + ieee80211_hw_config(local, hw_reconf_flags); /* * set default queue parameters so drivers don't * need to initialise the hardware if the hardware @@ -322,6 +325,7 @@ static int ieee80211_stop(struct net_device *dev) struct ieee80211_local *local = sdata->local; struct ieee80211_if_init_conf conf; struct sta_info *sta; + u32 hw_reconf_flags = 0; /* * Stop TX on this interface first. @@ -405,8 +409,10 @@ static int ieee80211_stop(struct net_device *dev) } local->monitors--; - if (local->monitors == 0) + if (local->monitors == 0) { local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; + hw_reconf_flags |= IEEE80211_CONF_CHANGE_RADIOTAP; + } if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) local->fif_fcsfail--; @@ -504,8 +510,15 @@ static int ieee80211_stop(struct net_device *dev) tasklet_disable(&local->tx_pending_tasklet); tasklet_disable(&local->tasklet); + + /* no reconfiguring after stop! */ + hw_reconf_flags = 0; } + /* do after stop to avoid reconfiguring when we stop anyway */ + if (hw_reconf_flags) + ieee80211_hw_config(local, hw_reconf_flags); + return 0; } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 07f812755e55..c936017f6d48 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -197,31 +197,34 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) &sdata->vif, &conf); } -int ieee80211_hw_config(struct ieee80211_local *local) +int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) { struct ieee80211_channel *chan; int ret = 0; + int power; if (local->sw_scanning) chan = local->scan_channel; else chan = local->oper_channel; - local->hw.conf.channel = chan; + if (chan != local->hw.conf.channel) { + local->hw.conf.channel = chan; + changed |= IEEE80211_CONF_CHANGE_CHANNEL; + } + if (!local->hw.conf.power_level) - local->hw.conf.power_level = chan->max_power; + power = chan->max_power; else - local->hw.conf.power_level = min(chan->max_power, - local->hw.conf.power_level); - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: HW CONFIG: freq=%d\n", - wiphy_name(local->hw.wiphy), chan->center_freq); -#endif + power = min(chan->max_power, local->hw.conf.power_level); + if (local->hw.conf.power_level != power) { + changed |= IEEE80211_CONF_CHANGE_POWER; + local->hw.conf.power_level = power; + } - if (local->open_count) { - ret = local->ops->config(local_to_hw(local), &local->hw.conf); + if (changed && local->open_count) { + ret = local->ops->config(local_to_hw(local), changed); /* * HW reconfiguration should never fail, the driver has told * us what it can support so it should live up to that promise. @@ -672,7 +675,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD; local->short_retry_limit = 7; local->long_retry_limit = 4; - local->hw.conf.radio_enabled = 1; + local->hw.conf.radio_enabled = true; INIT_LIST_HEAD(&local->interfaces); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 0989b1c062e3..7372d7abb8c0 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -448,12 +448,17 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) if (local->hw_scanning) { local->hw_scanning = false; - ieee80211_hw_config(local); + /* + * Somebody might have requested channel change during scan + * that we won't have acted upon, try now. ieee80211_hw_config + * will set the flag based on actual changes. + */ + ieee80211_hw_config(local, 0); goto done; } local->sw_scanning = false; - ieee80211_hw_config(local); + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); netif_tx_lock_bh(local->mdev); netif_addr_lock(local->mdev); @@ -540,7 +545,8 @@ void ieee80211_scan_work(struct work_struct *work) if (!skip) { local->scan_channel = chan; - if (ieee80211_hw_config(local)) + if (ieee80211_hw_config(local, + IEEE80211_CONF_CHANGE_CHANNEL)) skip = 1; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 9941a60a2327..3288c3de67ca 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -645,7 +645,8 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz) if (local->sw_scanning || local->hw_scanning) ret = 0; else - ret = ieee80211_hw_config(local); + ret = ieee80211_hw_config( + local, IEEE80211_CONF_CHANGE_CHANNEL); rate_control_clear(local); } diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index a3af15141244..94c4b35eeb14 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -656,7 +656,7 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev, union iwreq_data *data, char *extra) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - bool need_reconfig = 0; + u32 reconf_flags = 0; int new_power_level; if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) @@ -680,17 +680,17 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev, if (local->hw.conf.power_level != new_power_level) { local->hw.conf.power_level = new_power_level; - need_reconfig = 1; + reconf_flags |= IEEE80211_CONF_CHANGE_POWER; } if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) { local->hw.conf.radio_enabled = !(data->txpower.disabled); - need_reconfig = 1; + reconf_flags |= IEEE80211_CONF_CHANGE_RADIO_ENABLED; ieee80211_led_radio(local, local->hw.conf.radio_enabled); } - if (need_reconfig) - ieee80211_hw_config(local); + if (reconf_flags) + ieee80211_hw_config(local, reconf_flags); return 0; } @@ -976,7 +976,7 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, if (wrq->disabled) { conf->flags &= ~IEEE80211_CONF_PS; - return ieee80211_hw_config(local); + return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } switch (wrq->flags & IW_POWER_MODE) { @@ -989,7 +989,7 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, return -EINVAL; } - return ieee80211_hw_config(local); + return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } static int ieee80211_ioctl_giwpower(struct net_device *dev, -- cgit v1.2.3 From d51626df5747efaa8d2c00678f64cb503845effe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Oct 2008 12:20:13 +0200 Subject: nl80211: export HT capabilities This exports the local HT capabilities in nl80211. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 12 ++++++++++++ net/wireless/nl80211.c | 13 +++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 9bad65400fba..41720d47d618 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -452,17 +452,29 @@ enum nl80211_mpath_info { * an array of nested frequency attributes * @NL80211_BAND_ATTR_RATES: supported bitrates in this band, * an array of nested bitrate attributes + * @NL80211_BAND_ATTR_HT_MCS_SET: 16-byte attribute containing the MCS set as + * defined in 802.11n + * @NL80211_BAND_ATTR_HT_CAPA: HT capabilities, as in the HT information IE + * @NL80211_BAND_ATTR_HT_AMPDU_FACTOR: A-MPDU factor, as in 11n + * @NL80211_BAND_ATTR_HT_AMPDU_DENSITY: A-MPDU density, as in 11n */ enum nl80211_band_attr { __NL80211_BAND_ATTR_INVALID, NL80211_BAND_ATTR_FREQS, NL80211_BAND_ATTR_RATES, + NL80211_BAND_ATTR_HT_MCS_SET, + NL80211_BAND_ATTR_HT_CAPA, + NL80211_BAND_ATTR_HT_AMPDU_FACTOR, + NL80211_BAND_ATTR_HT_AMPDU_DENSITY, + /* keep last */ __NL80211_BAND_ATTR_AFTER_LAST, NL80211_BAND_ATTR_MAX = __NL80211_BAND_ATTR_AFTER_LAST - 1 }; +#define NL80211_BAND_ATTR_HT_CAPA NL80211_BAND_ATTR_HT_CAPA + /** * enum nl80211_frequency_attr - frequency attributes * @NL80211_FREQUENCY_ATTR_FREQ: Frequency in MHz diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 572793c8c7ab..4d12e885170e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -157,6 +157,19 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (!nl_band) goto nla_put_failure; + /* add HT info */ + if (dev->wiphy.bands[band]->ht_cap.ht_supported) { + NLA_PUT(msg, NL80211_BAND_ATTR_HT_MCS_SET, + sizeof(dev->wiphy.bands[band]->ht_cap.mcs), + &dev->wiphy.bands[band]->ht_cap.mcs); + NLA_PUT_U16(msg, NL80211_BAND_ATTR_HT_CAPA, + dev->wiphy.bands[band]->ht_cap.cap); + NLA_PUT_U8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR, + dev->wiphy.bands[band]->ht_cap.ampdu_factor); + NLA_PUT_U8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY, + dev->wiphy.bands[band]->ht_cap.ampdu_density); + } + /* add frequencies */ nl_freqs = nla_nest_start(msg, NL80211_BAND_ATTR_FREQS); if (!nl_freqs) -- cgit v1.2.3 From 9124b07740c51cbc6e358dd0c4abc6ee8ded084d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 14 Oct 2008 19:17:54 +0200 Subject: mac80211: make retry limits part of hw config Instead of having a separate callback, use the HW config callback with a new flag to change retry limits. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/main.c | 1 - drivers/net/wireless/b43/main.c | 60 +++++++++++++-------------------- drivers/net/wireless/b43legacy/main.c | 59 ++++++++++++-------------------- drivers/net/wireless/rt2x00/rt2400pci.c | 2 +- drivers/net/wireless/rt2x00/rt2500pci.c | 2 +- drivers/net/wireless/rt2x00/rt2x00.h | 3 ++ drivers/net/wireless/rt2x00/rt2x00mac.c | 9 +++++ drivers/net/wireless/rt2x00/rt61pci.c | 2 +- drivers/net/wireless/rt2x00/rt73usb.c | 2 +- include/net/mac80211.h | 14 +++++--- net/mac80211/debugfs.c | 4 +-- net/mac80211/ieee80211_i.h | 2 -- net/mac80211/main.c | 4 +-- net/mac80211/tx.c | 4 +-- net/mac80211/wext.c | 28 +++++++-------- 15 files changed, 89 insertions(+), 107 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c index f49910799ede..41cd114c438c 100644 --- a/drivers/net/wireless/ath9k/main.c +++ b/drivers/net/wireless/ath9k/main.c @@ -1659,7 +1659,6 @@ static struct ieee80211_ops ath9k_ops = { .get_tkip_seq = NULL, .set_rts_threshold = NULL, .set_frag_threshold = NULL, - .set_retry_limit = NULL, .get_tsf = ath9k_get_tsf, .reset_tsf = ath9k_reset_tsf, .tx_last_beacon = NULL, diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 2e81af1022e4..9aeeb6553a91 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3320,6 +3320,22 @@ init_failure: return err; } +/* Write the short and long frame retry limit values. */ +static void b43_set_retry_limits(struct b43_wldev *dev, + unsigned int short_retry, + unsigned int long_retry) +{ + /* The retry limit is a 4-bit counter. Enforce this to avoid overflowing + * the chip-internal counter. */ + short_retry = min(short_retry, (unsigned int)0xF); + long_retry = min(long_retry, (unsigned int)0xF); + + b43_shm_write16(dev, B43_SHM_SCRATCH, B43_SHM_SC_SRLIMIT, + short_retry); + b43_shm_write16(dev, B43_SHM_SCRATCH, B43_SHM_SC_LRLIMIT, + long_retry); +} + static int b43_op_config(struct ieee80211_hw *hw, u32 changed) { struct b43_wl *wl = hw_to_b43_wl(hw); @@ -3340,6 +3356,13 @@ static int b43_op_config(struct ieee80211_hw *hw, u32 changed) dev = wl->current_dev; phy = &dev->phy; + if (changed & IEEE80211_CONF_CHANGE_RETRY_LIMITS) + b43_set_retry_limits(dev, conf->short_frame_max_tx_count, + conf->long_frame_max_tx_count); + changed &= ~IEEE80211_CONF_CHANGE_RETRY_LIMITS; + if (!changed) + goto out_unlock_mutex; + /* Disable IRQs while reconfiguring the device. * This makes it possible to drop the spinlock throughout * the reconfiguration process. */ @@ -3859,22 +3882,6 @@ static void b43_imcfglo_timeouts_workaround(struct b43_wldev *dev) #endif /* CONFIG_SSB_DRIVER_PCICORE */ } -/* Write the short and long frame retry limit values. */ -static void b43_set_retry_limits(struct b43_wldev *dev, - unsigned int short_retry, - unsigned int long_retry) -{ - /* The retry limit is a 4-bit counter. Enforce this to avoid overflowing - * the chip-internal counter. */ - short_retry = min(short_retry, (unsigned int)0xF); - long_retry = min(long_retry, (unsigned int)0xF); - - b43_shm_write16(dev, B43_SHM_SCRATCH, B43_SHM_SC_SRLIMIT, - short_retry); - b43_shm_write16(dev, B43_SHM_SCRATCH, B43_SHM_SC_LRLIMIT, - long_retry); -} - static void b43_set_synth_pu_delay(struct b43_wldev *dev, bool idle) { u16 pu_delay; @@ -4195,26 +4202,6 @@ static void b43_op_stop(struct ieee80211_hw *hw) cancel_work_sync(&(wl->txpower_adjust_work)); } -static int b43_op_set_retry_limit(struct ieee80211_hw *hw, - u32 short_retry_limit, u32 long_retry_limit) -{ - struct b43_wl *wl = hw_to_b43_wl(hw); - struct b43_wldev *dev; - int err = 0; - - mutex_lock(&wl->mutex); - dev = wl->current_dev; - if (unlikely(!dev || (b43_status(dev) < B43_STAT_INITIALIZED))) { - err = -ENODEV; - goto out_unlock; - } - b43_set_retry_limits(dev, short_retry_limit, long_retry_limit); -out_unlock: - mutex_unlock(&wl->mutex); - - return err; -} - static int b43_op_beacon_set_tim(struct ieee80211_hw *hw, struct ieee80211_sta *sta, bool set) { @@ -4251,7 +4238,6 @@ static const struct ieee80211_ops b43_hw_ops = { .get_tx_stats = b43_op_get_tx_stats, .start = b43_op_start, .stop = b43_op_stop, - .set_retry_limit = b43_op_set_retry_limit, .set_tim = b43_op_beacon_set_tim, .sta_notify = b43_op_sta_notify, }; diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 793cc396562f..78e46365f69e 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -2556,6 +2556,20 @@ init_failure: return err; } +/* Write the short and long frame retry limit values. */ +static void b43legacy_set_retry_limits(struct b43legacy_wldev *dev, + unsigned int short_retry, + unsigned int long_retry) +{ + /* The retry limit is a 4-bit counter. Enforce this to avoid overflowing + * the chip-internal counter. */ + short_retry = min(short_retry, (unsigned int)0xF); + long_retry = min(long_retry, (unsigned int)0xF); + + b43legacy_shm_write16(dev, B43legacy_SHM_WIRELESS, 0x0006, short_retry); + b43legacy_shm_write16(dev, B43legacy_SHM_WIRELESS, 0x0007, long_retry); +} + static int b43legacy_op_dev_config(struct ieee80211_hw *hw, u32 changed) { @@ -2577,6 +2591,14 @@ static int b43legacy_op_dev_config(struct ieee80211_hw *hw, dev = wl->current_dev; phy = &dev->phy; + if (changed & IEEE80211_CONF_CHANGE_RETRY_LIMITS) + b43legacy_set_retry_limits(dev, + conf->short_frame_max_tx_count, + conf->long_frame_max_tx_count); + changed &= ~IEEE80211_CONF_CHANGE_RETRY_LIMITS; + if (!changed) + goto out_unlock_mutex; + /* Switch the PHY mode (if necessary). */ switch (conf->channel->band) { case IEEE80211_BAND_2GHZ: @@ -2989,20 +3011,6 @@ static void b43legacy_imcfglo_timeouts_workaround(struct b43legacy_wldev *dev) #endif /* CONFIG_SSB_DRIVER_PCICORE */ } -/* Write the short and long frame retry limit values. */ -static void b43legacy_set_retry_limits(struct b43legacy_wldev *dev, - unsigned int short_retry, - unsigned int long_retry) -{ - /* The retry limit is a 4-bit counter. Enforce this to avoid overflowing - * the chip-internal counter. */ - short_retry = min(short_retry, (unsigned int)0xF); - long_retry = min(long_retry, (unsigned int)0xF); - - b43legacy_shm_write16(dev, B43legacy_SHM_WIRELESS, 0x0006, short_retry); - b43legacy_shm_write16(dev, B43legacy_SHM_WIRELESS, 0x0007, long_retry); -} - static void b43legacy_set_synth_pu_delay(struct b43legacy_wldev *dev, bool idle) { u16 pu_delay = 1050; @@ -3367,28 +3375,6 @@ static void b43legacy_op_stop(struct ieee80211_hw *hw) mutex_unlock(&wl->mutex); } -static int b43legacy_op_set_retry_limit(struct ieee80211_hw *hw, - u32 short_retry_limit, - u32 long_retry_limit) -{ - struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw); - struct b43legacy_wldev *dev; - int err = 0; - - mutex_lock(&wl->mutex); - dev = wl->current_dev; - if (unlikely(!dev || - (b43legacy_status(dev) < B43legacy_STAT_INITIALIZED))) { - err = -ENODEV; - goto out_unlock; - } - b43legacy_set_retry_limits(dev, short_retry_limit, long_retry_limit); -out_unlock: - mutex_unlock(&wl->mutex); - - return err; -} - static int b43legacy_op_beacon_set_tim(struct ieee80211_hw *hw, struct ieee80211_sta *sta, bool set) { @@ -3414,7 +3400,6 @@ static const struct ieee80211_ops b43legacy_hw_ops = { .get_tx_stats = b43legacy_op_get_tx_stats, .start = b43legacy_op_start, .stop = b43legacy_op_stop, - .set_retry_limit = b43legacy_op_set_retry_limit, .set_tim = b43legacy_op_beacon_set_tim, }; diff --git a/drivers/net/wireless/rt2x00/rt2400pci.c b/drivers/net/wireless/rt2x00/rt2400pci.c index 96eaf5f4dc68..1adca7a1b9dc 100644 --- a/drivers/net/wireless/rt2x00/rt2400pci.c +++ b/drivers/net/wireless/rt2x00/rt2400pci.c @@ -1574,7 +1574,6 @@ static const struct ieee80211_ops rt2400pci_mac80211_ops = { .config_interface = rt2x00mac_config_interface, .configure_filter = rt2x00mac_configure_filter, .get_stats = rt2x00mac_get_stats, - .set_retry_limit = rt2400pci_set_retry_limit, .bss_info_changed = rt2x00mac_bss_info_changed, .conf_tx = rt2400pci_conf_tx, .get_tx_stats = rt2x00mac_get_tx_stats, @@ -1603,6 +1602,7 @@ static const struct rt2x00lib_ops rt2400pci_rt2x00_ops = { .config_intf = rt2400pci_config_intf, .config_erp = rt2400pci_config_erp, .config = rt2400pci_config, + .set_retry_limit = rt2400pci_set_retry_limit, }; static const struct data_queue_desc rt2400pci_queue_rx = { diff --git a/drivers/net/wireless/rt2x00/rt2500pci.c b/drivers/net/wireless/rt2x00/rt2500pci.c index 8b772ab613e4..85b0387f46eb 100644 --- a/drivers/net/wireless/rt2x00/rt2500pci.c +++ b/drivers/net/wireless/rt2x00/rt2500pci.c @@ -1874,7 +1874,6 @@ static const struct ieee80211_ops rt2500pci_mac80211_ops = { .config_interface = rt2x00mac_config_interface, .configure_filter = rt2x00mac_configure_filter, .get_stats = rt2x00mac_get_stats, - .set_retry_limit = rt2500pci_set_retry_limit, .bss_info_changed = rt2x00mac_bss_info_changed, .conf_tx = rt2x00mac_conf_tx, .get_tx_stats = rt2x00mac_get_tx_stats, @@ -1903,6 +1902,7 @@ static const struct rt2x00lib_ops rt2500pci_rt2x00_ops = { .config_intf = rt2500pci_config_intf, .config_erp = rt2500pci_config_erp, .config = rt2500pci_config, + .set_retry_limit = rt2500pci_set_retry_limit, }; static const struct data_queue_desc rt2500pci_queue_rx = { diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index 8ec8f7e9ec64..0887e895d5c1 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -599,6 +599,9 @@ struct rt2x00lib_ops { #define CONFIG_UPDATE_SLOT_TIME ( 1 << 5 ) #define CONFIG_UPDATE_BEACON_INT ( 1 << 6 ) #define CONFIG_UPDATE_ALL 0xffff + + int (*set_retry_limit) (struct ieee80211_hw *hw, + u32 short_limit, u32 long_limit); }; /* diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index da7b49a364ff..931183369f07 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -349,6 +349,15 @@ int rt2x00mac_config(struct ieee80211_hw *hw, u32 changed) if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags)) return 0; + if (changed & IEEE80211_CONF_CHANGE_RETRY_LIMITS) { + rt2x00dev->ops->lib->set_retry_limit(hw, + conf->short_frame_max_tx_count, + conf->long_frame_max_tx_count); + } + changed &= ~IEEE80211_CONF_CHANGE_RETRY_LIMITS; + if (!changed) + return 0; + /* * Only change device state when the radio is enabled. It does not * matter what parameters we have configured when the radio is disabled diff --git a/drivers/net/wireless/rt2x00/rt61pci.c b/drivers/net/wireless/rt2x00/rt61pci.c index 45f69f83552c..08eb896ae36c 100644 --- a/drivers/net/wireless/rt2x00/rt61pci.c +++ b/drivers/net/wireless/rt2x00/rt61pci.c @@ -2724,7 +2724,6 @@ static const struct ieee80211_ops rt61pci_mac80211_ops = { .configure_filter = rt2x00mac_configure_filter, .set_key = rt2x00mac_set_key, .get_stats = rt2x00mac_get_stats, - .set_retry_limit = rt61pci_set_retry_limit, .bss_info_changed = rt2x00mac_bss_info_changed, .conf_tx = rt61pci_conf_tx, .get_tx_stats = rt2x00mac_get_tx_stats, @@ -2757,6 +2756,7 @@ static const struct rt2x00lib_ops rt61pci_rt2x00_ops = { .config_intf = rt61pci_config_intf, .config_erp = rt61pci_config_erp, .config = rt61pci_config, + .set_retry_limit = rt61pci_set_retry_limit, }; static const struct data_queue_desc rt61pci_queue_rx = { diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index 336fecb04c46..7f89782a34e4 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -2315,7 +2315,6 @@ static const struct ieee80211_ops rt73usb_mac80211_ops = { .configure_filter = rt2x00mac_configure_filter, .set_key = rt2x00mac_set_key, .get_stats = rt2x00mac_get_stats, - .set_retry_limit = rt73usb_set_retry_limit, .bss_info_changed = rt2x00mac_bss_info_changed, .conf_tx = rt73usb_conf_tx, .get_tx_stats = rt2x00mac_get_tx_stats, @@ -2347,6 +2346,7 @@ static const struct rt2x00lib_ops rt73usb_rt2x00_ops = { .config_intf = rt73usb_config_intf, .config_erp = rt73usb_config_erp, .config = rt73usb_config, + .set_retry_limit = rt73usb_set_retry_limit, }; static const struct data_queue_desc rt73usb_queue_rx = { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 34e8569b59bb..fd52300b96d0 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -473,6 +473,7 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) * @IEEE80211_CONF_CHANGE_PS: the PS flag changed * @IEEE80211_CONF_CHANGE_POWER: the TX power changed * @IEEE80211_CONF_CHANGE_CHANNEL: the channel changed + * @IEEE80211_CONF_CHANGE_RETRY_LIMITS: retry limits changed */ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_RADIO_ENABLED = BIT(0), @@ -482,6 +483,7 @@ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_PS = BIT(4), IEEE80211_CONF_CHANGE_POWER = BIT(5), IEEE80211_CONF_CHANGE_CHANNEL = BIT(6), + IEEE80211_CONF_CHANGE_RETRY_LIMITS = BIT(7), }; /** @@ -497,6 +499,12 @@ enum ieee80211_conf_changed { * @ht_cap: describes current self configuration of 802.11n HT capabilities * @ht_bss_conf: describes current BSS configuration of 802.11n HT parameters * @channel: the channel to tune to + * @long_frame_max_tx_count: Maximum number of transmissions for a "long" frame + * (a frame not RTS protected), called "dot11LongRetryLimit" in 802.11, + * but actually means the number of transmissions not the number of retries + * @short_frame_max_tx_count: Maximum number of transmissions for a "short" + * frame, called "dot11ShortRetryLimit" in 802.11, but actually means the + * number of transmissions not the number of retries */ struct ieee80211_conf { int beacon_int; @@ -506,6 +514,8 @@ struct ieee80211_conf { u16 listen_interval; bool radio_enabled; + u8 long_frame_max_tx_count, short_frame_max_tx_count; + struct ieee80211_channel *channel; struct ieee80211_sta_ht_cap ht_cap; @@ -1190,8 +1200,6 @@ enum ieee80211_ampdu_mlme_action { * the device does fragmentation by itself; if this method is assigned then * the stack will not do fragmentation. * - * @set_retry_limit: Configuration of retry limits (if device needs it) - * * @sta_notify: Notifies low level driver about addition or removal * of assocaited station or AP. * @@ -1261,8 +1269,6 @@ struct ieee80211_ops { u32 *iv32, u16 *iv16); int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value); int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value); - int (*set_retry_limit)(struct ieee80211_hw *hw, - u32 short_retry, u32 long_retr); void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); int (*conf_tx)(struct ieee80211_hw *hw, u16 queue, diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 767dbca50adb..2697a2fe608f 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -52,9 +52,9 @@ DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d", DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d", local->fragmentation_threshold); DEBUGFS_READONLY_FILE(short_retry_limit, 20, "%d", - local->short_retry_limit); + local->hw.conf.short_frame_max_tx_count); DEBUGFS_READONLY_FILE(long_retry_limit, 20, "%d", - local->long_retry_limit); + local->hw.conf.long_frame_max_tx_count); DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d", local->total_ps_buffered); DEBUGFS_READONLY_FILE(wep_iv, 20, "%#06x", diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 819844726dd4..f1ef522bfe07 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -635,8 +635,6 @@ struct ieee80211_local { int rts_threshold; int fragmentation_threshold; - int short_retry_limit; /* dot11ShortRetryLimit */ - int long_retry_limit; /* dot11LongRetryLimit */ struct crypto_blkcipher *wep_tx_tfm; struct crypto_blkcipher *wep_rx_tfm; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index c936017f6d48..59be9e782699 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -673,8 +673,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD; - local->short_retry_limit = 7; - local->long_retry_limit = 4; + local->hw.conf.long_frame_max_tx_count = 4; + local->hw.conf.short_frame_max_tx_count = 7; local->hw.conf.radio_enabled = true; INIT_LIST_HEAD(&local->interfaces); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8bcabefa86e0..dd440a07634e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -505,10 +505,10 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) info->flags |= IEEE80211_TX_CTL_LONG_RETRY_LIMIT; info->control.retry_limit = - tx->local->long_retry_limit; + tx->local->hw.conf.long_frame_max_tx_count - 1; } else { info->control.retry_limit = - tx->local->short_retry_limit; + tx->local->hw.conf.short_frame_max_tx_count - 1; } } else { info->control.retry_limit = 1; diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 94c4b35eeb14..f7e442f80a17 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -802,21 +802,16 @@ static int ieee80211_ioctl_siwretry(struct net_device *dev, (retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT) return -EINVAL; - if (retry->flags & IW_RETRY_MAX) - local->long_retry_limit = retry->value; - else if (retry->flags & IW_RETRY_MIN) - local->short_retry_limit = retry->value; - else { - local->long_retry_limit = retry->value; - local->short_retry_limit = retry->value; + if (retry->flags & IW_RETRY_MAX) { + local->hw.conf.long_frame_max_tx_count = retry->value; + } else if (retry->flags & IW_RETRY_MIN) { + local->hw.conf.short_frame_max_tx_count = retry->value; + } else { + local->hw.conf.long_frame_max_tx_count = retry->value; + local->hw.conf.short_frame_max_tx_count = retry->value; } - if (local->ops->set_retry_limit) { - return local->ops->set_retry_limit( - local_to_hw(local), - local->short_retry_limit, - local->long_retry_limit); - } + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_RETRY_LIMITS); return 0; } @@ -833,14 +828,15 @@ static int ieee80211_ioctl_giwretry(struct net_device *dev, /* first return min value, iwconfig will ask max value * later if needed */ retry->flags |= IW_RETRY_LIMIT; - retry->value = local->short_retry_limit; - if (local->long_retry_limit != local->short_retry_limit) + retry->value = local->hw.conf.short_frame_max_tx_count; + if (local->hw.conf.long_frame_max_tx_count != + local->hw.conf.short_frame_max_tx_count) retry->flags |= IW_RETRY_MIN; return 0; } if (retry->flags & IW_RETRY_MAX) { retry->flags = IW_RETRY_LIMIT | IW_RETRY_MAX; - retry->value = local->long_retry_limit; + retry->value = local->hw.conf.long_frame_max_tx_count; } return 0; -- cgit v1.2.3 From bda3933a8aceedd03e0dd410844bd310033ca756 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 11 Oct 2008 01:51:51 +0200 Subject: mac80211: move bss_conf into vif Move bss_conf into the vif struct so that drivers can access it during ->tx without having to store it in the private data or similar. No driver updates because this is only for when they want to start using it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 3 +++ net/mac80211/cfg.c | 6 +++--- net/mac80211/debugfs_netdev.c | 2 +- net/mac80211/ieee80211_i.h | 3 --- net/mac80211/iface.c | 2 +- net/mac80211/main.c | 8 ++++---- net/mac80211/mlme.c | 30 +++++++++++++++--------------- net/mac80211/tx.c | 16 ++++++++-------- net/mac80211/util.c | 6 +++--- 9 files changed, 38 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fd52300b96d0..94ff3eface49 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -529,11 +529,14 @@ struct ieee80211_conf { * use during the life of a virtual interface. * * @type: type of this virtual interface + * @bss_conf: BSS configuration for this interface, either our own + * or the BSS we're associated to * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *). */ struct ieee80211_vif { enum nl80211_iftype type; + struct ieee80211_bss_conf bss_conf; /* must be last */ u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); }; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8ea30902d5db..28382b5c7c25 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -964,16 +964,16 @@ static int ieee80211_change_bss(struct wiphy *wiphy, return -EINVAL; if (params->use_cts_prot >= 0) { - sdata->bss_conf.use_cts_prot = params->use_cts_prot; + sdata->vif.bss_conf.use_cts_prot = params->use_cts_prot; changed |= BSS_CHANGED_ERP_CTS_PROT; } if (params->use_short_preamble >= 0) { - sdata->bss_conf.use_short_preamble = + sdata->vif.bss_conf.use_short_preamble = params->use_short_preamble; changed |= BSS_CHANGED_ERP_PREAMBLE; } if (params->use_short_slot_time >= 0) { - sdata->bss_conf.use_short_slot = + sdata->vif.bss_conf.use_short_slot = params->use_short_slot_time; changed |= BSS_CHANGED_ERP_SLOT; } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 56f3f29385cb..c54219301724 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -119,7 +119,7 @@ static ssize_t ieee80211_if_fmt_flags( sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "", sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "", sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "", - sdata->bss_conf.use_cts_prot ? "CTS prot\n" : ""); + sdata->vif.bss_conf.use_cts_prot ? "CTS prot\n" : ""); } __IEEE80211_IF_FILE(flags); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f1ef522bfe07..859b5b001f22 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -440,9 +440,6 @@ struct ieee80211_sub_if_data { u16 sequence_number; - /* BSS configuration for this interface. */ - struct ieee80211_bss_conf bss_conf; - /* * AP this belongs to: self in AP mode and * corresponding AP in VLAN mode, NULL for diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index df28c5f7c9c0..cde145221b61 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -695,7 +695,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, ieee80211_setup_sdata(sdata, type); /* reset some values that shouldn't be kept across type changes */ - sdata->bss_conf.basic_rates = + sdata->vif.bss_conf.basic_rates = ieee80211_mandatory_rates(sdata->local, sdata->local->hw.conf.channel->band); sdata->drop_unencrypted = 0; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 59be9e782699..ab8a860444af 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -249,15 +249,15 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, if (local->ops->bss_info_changed) local->ops->bss_info_changed(local_to_hw(local), &sdata->vif, - &sdata->bss_conf, + &sdata->vif.bss_conf, changed); } u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) { - sdata->bss_conf.use_cts_prot = false; - sdata->bss_conf.use_short_preamble = false; - sdata->bss_conf.use_short_slot = false; + sdata->vif.bss_conf.use_cts_prot = false; + sdata->vif.bss_conf.use_short_preamble = false; + sdata->vif.bss_conf.use_short_slot = false; return BSS_CHANGED_ERP_CTS_PROT | BSS_CHANGED_ERP_PREAMBLE | BSS_CHANGED_ERP_SLOT; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 196dd39f6286..9c5f5c37a49e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -572,7 +572,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, u16 capab, bool erp_valid, u8 erp) { - struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf; + struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; #ifdef CONFIG_MAC80211_VERBOSE_DEBUG struct ieee80211_if_sta *ifsta = &sdata->u.sta; #endif @@ -718,9 +718,9 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ifsta->ssid, ifsta->ssid_len); if (bss) { /* set timing information */ - sdata->bss_conf.beacon_int = bss->beacon_int; - sdata->bss_conf.timestamp = bss->timestamp; - sdata->bss_conf.dtim_period = bss->dtim_period; + sdata->vif.bss_conf.beacon_int = bss->beacon_int; + sdata->vif.bss_conf.timestamp = bss->timestamp; + sdata->vif.bss_conf.dtim_period = bss->dtim_period; changed |= ieee80211_handle_bss_capability(sdata, bss->capability, bss->has_erp_value, bss->erp_value); @@ -730,9 +730,9 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) { changed |= BSS_CHANGED_HT; - sdata->bss_conf.assoc_ht = 1; - sdata->bss_conf.ht_cap = &conf->ht_cap; - sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf; + sdata->vif.bss_conf.assoc_ht = 1; + sdata->vif.bss_conf.ht_cap = &conf->ht_cap; + sdata->vif.bss_conf.ht_bss_conf = &conf->ht_bss_conf; } ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; @@ -742,7 +742,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ifsta->last_probe = jiffies; ieee80211_led_assoc(local, 1); - sdata->bss_conf.assoc = 1; + sdata->vif.bss_conf.assoc = 1; /* * For now just always ask the driver to update the basic rateset * when we have associated, we aren't checking whether it actually @@ -847,15 +847,15 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; changed |= ieee80211_reset_erp_info(sdata); - if (sdata->bss_conf.assoc_ht) + if (sdata->vif.bss_conf.assoc_ht) changed |= BSS_CHANGED_HT; - sdata->bss_conf.assoc_ht = 0; - sdata->bss_conf.ht_cap = NULL; - sdata->bss_conf.ht_bss_conf = NULL; + sdata->vif.bss_conf.assoc_ht = 0; + sdata->vif.bss_conf.ht_cap = NULL; + sdata->vif.bss_conf.ht_bss_conf = NULL; ieee80211_led_assoc(local, 0); - sdata->bss_conf.assoc = 0; + sdata->vif.bss_conf.assoc = 0; ieee80211_sta_send_apinfo(sdata, ifsta); @@ -1182,7 +1182,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, u64 rates, basic_rates; u16 capab_info, status_code, aid; struct ieee802_11_elems elems; - struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf; + struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; u8 *pos; int i, j; bool have_higher_than_11mbit = false; @@ -1324,7 +1324,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, } sta->sta.supp_rates[local->hw.conf.channel->band] = rates; - sdata->bss_conf.basic_rates = basic_rates; + sdata->vif.bss_conf.basic_rates = basic_rates; /* cf. IEEE 802.11 9.2.12 */ if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index dd440a07634e..6f3e4be97631 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -116,7 +116,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, if (r->bitrate > txrate->bitrate) break; - if (tx->sdata->bss_conf.basic_rates & BIT(i)) + if (tx->sdata->vif.bss_conf.basic_rates & BIT(i)) rate = r->bitrate; switch (sband->band) { @@ -150,7 +150,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, * to closest integer */ dur = ieee80211_frame_duration(local, 10, rate, erp, - tx->sdata->bss_conf.use_short_preamble); + tx->sdata->vif.bss_conf.use_short_preamble); if (next_frag_len) { /* Frame is fragmented: duration increases with time needed to @@ -159,7 +159,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, /* next fragment */ dur += ieee80211_frame_duration(local, next_frag_len, txrate->bitrate, erp, - tx->sdata->bss_conf.use_short_preamble); + tx->sdata->vif.bss_conf.use_short_preamble); } return cpu_to_le16(dur); @@ -463,7 +463,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) } else info->control.retries[0].rate_idx = -1; - if (tx->sdata->bss_conf.use_cts_prot && + if (tx->sdata->vif.bss_conf.use_cts_prot && (tx->flags & IEEE80211_TX_FRAGMENTED) && (rsel.nonerp_idx >= 0)) { tx->last_frag_rate_idx = tx->rate_idx; if (rsel.probe_idx >= 0) @@ -529,7 +529,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) if ((tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) && (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_ERP_G) && (tx->flags & IEEE80211_TX_UNICAST) && - tx->sdata->bss_conf.use_cts_prot && + tx->sdata->vif.bss_conf.use_cts_prot && !(info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)) info->flags |= IEEE80211_TX_CTL_USE_CTS_PROTECT; @@ -538,7 +538,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) * available on the network at the current point in time. */ if (ieee80211_is_data(hdr->frame_control) && (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) && - tx->sdata->bss_conf.use_short_preamble && + tx->sdata->vif.bss_conf.use_short_preamble && (!tx->sta || test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE))) { info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE; } @@ -558,7 +558,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) for (idx = 0; idx < sband->n_bitrates; idx++) { if (sband->bitrates[idx].bitrate > rate->bitrate) continue; - if (tx->sdata->bss_conf.basic_rates & BIT(idx) && + if (tx->sdata->vif.bss_conf.basic_rates & BIT(idx) && (baserate < 0 || (sband->bitrates[baserate].bitrate < sband->bitrates[idx].bitrate))) @@ -1977,7 +1977,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, info->flags |= IEEE80211_TX_CTL_NO_ACK; info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; - if (sdata->bss_conf.use_short_preamble && + if (sdata->vif.bss_conf.use_short_preamble && sband->bitrates[rsel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3288c3de67ca..ec8b6335f0c1 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -239,7 +239,7 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, erp = 0; if (vif) { sdata = vif_to_sdata(vif); - short_preamble = sdata->bss_conf.use_short_preamble; + short_preamble = sdata->vif.bss_conf.use_short_preamble; if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) erp = rate->flags & IEEE80211_RATE_ERP_G; } @@ -272,7 +272,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, erp = 0; if (vif) { sdata = vif_to_sdata(vif); - short_preamble = sdata->bss_conf.use_short_preamble; + short_preamble = sdata->vif.bss_conf.use_short_preamble; if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) erp = rate->flags & IEEE80211_RATE_ERP_G; } @@ -312,7 +312,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, erp = 0; if (vif) { sdata = vif_to_sdata(vif); - short_preamble = sdata->bss_conf.use_short_preamble; + short_preamble = sdata->vif.bss_conf.use_short_preamble; if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) erp = rate->flags & IEEE80211_RATE_ERP_G; } -- cgit v1.2.3 From ae5eb02641233a4e9d1b92d22090f1b1afa14466 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 14 Oct 2008 16:58:37 +0200 Subject: mac80211: rewrite HT handling The HT handling has the following deficiencies, which I've (partially) fixed: * it always uses the AP info even if there is no AP, hence has no chance of working as an AP * it pretends to be HW config, but really is per-BSS * channel sanity checking is left to the drivers * it generally lets the driver control too much HT enabling is still wrong with this patch if you have more than one virtual STA mode interface, but that never happens currently. Once WDS, IBSS or AP/VLAN gets HT capabilities, it will also be wrong, see the comment in ieee80211_enable_ht(). Additionally, this fixes a number of bugs: * mac80211: ieee80211_set_disassoc doesn't notify the driver any more since the refactoring * iwl-agn-rs: always uses the HT capabilities from the wrong stuff mac80211 gives it rather than the actual peer STA * ath9k: a number of bugs resulting from the broken HT API I'm not entirely happy with putting the HT capabilities into struct ieee80211_sta as restricted to our own HT TX capabilities, but I see no cleaner solution for now. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/core.h | 6 +- drivers/net/wireless/ath9k/main.c | 41 ++++--- drivers/net/wireless/ath9k/rc.c | 8 +- drivers/net/wireless/ath9k/recv.c | 3 +- drivers/net/wireless/ath9k/xmit.c | 17 +-- drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 23 ++-- drivers/net/wireless/iwlwifi/iwl-agn.c | 39 ++++--- drivers/net/wireless/iwlwifi/iwl-core.c | 17 +-- drivers/net/wireless/iwlwifi/iwl-dev.h | 1 - drivers/net/wireless/iwlwifi/iwl-sta.c | 33 ++++-- include/net/mac80211.h | 59 +++++----- net/mac80211/cfg.c | 6 +- net/mac80211/ht.c | 181 +++++++++++++----------------- net/mac80211/ieee80211_i.h | 12 +- net/mac80211/mlme.c | 88 ++++++++------- 15 files changed, 260 insertions(+), 274 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath9k/core.h b/drivers/net/wireless/ath9k/core.h index cb3e61e57c4d..59d835b72cd8 100644 --- a/drivers/net/wireless/ath9k/core.h +++ b/drivers/net/wireless/ath9k/core.h @@ -380,7 +380,6 @@ void ath_rx_cleanup(struct ath_softc *sc); int ath_rx_tasklet(struct ath_softc *sc, int flush); int ath_rx_input(struct ath_softc *sc, struct ath_node *node, - int is_ampdu, struct sk_buff *skb, struct ath_recv_status *rx_status, enum ATH_RX_TYPE *status); @@ -650,6 +649,9 @@ struct ath_node { u8 an_smmode; /* SM Power save mode */ u8 an_flags; u8 an_addr[ETH_ALEN]; + + u16 maxampdu; + u8 mpdudensity; }; void ath_tx_resume_tid(struct ath_softc *sc, @@ -919,8 +921,6 @@ enum RATE_TYPE { struct ath_ht_info { enum ath9k_ht_macmode tx_chan_width; - u16 maxampdu; - u8 mpdudensity; u8 ext_chan_offset; }; diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c index 41cd114c438c..7555c3413384 100644 --- a/drivers/net/wireless/ath9k/main.c +++ b/drivers/net/wireless/ath9k/main.c @@ -330,25 +330,15 @@ static void ath9k_ht_conf(struct ath_softc *sc, { struct ath_ht_info *ht_info = &sc->sc_ht_info; - if (bss_conf->assoc_ht) { - ht_info->ext_chan_offset = - bss_conf->ht_bss_conf->bss_cap & - IEEE80211_HT_PARAM_CHA_SEC_OFFSET; - - if (!(bss_conf->ht_cap->cap & - IEEE80211_HT_CAP_40MHZ_INTOLERANT) && - (bss_conf->ht_bss_conf->bss_cap & - IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) + if (sc->hw->conf.ht.enabled) { + ht_info->ext_chan_offset = bss_conf->ht.secondary_channel_offset; + + if (bss_conf->ht.width_40_ok) ht_info->tx_chan_width = ATH9K_HT_MACMODE_2040; else ht_info->tx_chan_width = ATH9K_HT_MACMODE_20; ath9k_hw_set11nmac2040(sc->sc_ah, ht_info->tx_chan_width); - ht_info->maxampdu = 1 << (IEEE80211_HTCAP_MAXRXAMPDU_FACTOR + - bss_conf->ht_cap->ampdu_factor); - ht_info->mpdudensity = - parse_mpdudensity(bss_conf->ht_cap->ampdu_density); - } } @@ -390,7 +380,7 @@ static void ath9k_bss_assoc_info(struct ath_softc *sc, sc->sc_halstats.ns_avgtxrate = ATH_RATE_DUMMY_MARKER; /* Update chainmask */ - ath_update_chainmask(sc, bss_conf->assoc_ht); + ath_update_chainmask(sc, hw->conf.ht.enabled); DPRINTF(sc, ATH_DBG_CONFIG, "%s: bssid %pM aid 0x%x\n", @@ -408,7 +398,7 @@ static void ath9k_bss_assoc_info(struct ath_softc *sc, return; } - if (hw->conf.ht_cap.ht_supported) + if (hw->conf.ht.enabled) sc->sc_ah->ah_channels[pos].chanmode = ath_get_extchanmode(sc, curchan); else @@ -531,7 +521,6 @@ int _ath_rx_indicate(struct ath_softc *sc, if (an) { ath_rx_input(sc, an, - hw->conf.ht_cap.ht_supported, skb, status, &st); } if (!an || (st != ATH_RX_CONSUMED)) @@ -1241,6 +1230,9 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) __func__, curchan->center_freq); + /* Update chainmask */ + ath_update_chainmask(sc, conf->ht.enabled); + pos = ath_get_channel(sc, curchan); if (pos == -1) { DPRINTF(sc, ATH_DBG_FATAL, "%s: Invalid channel\n", __func__); @@ -1251,7 +1243,7 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) (curchan->band == IEEE80211_BAND_2GHZ) ? CHANNEL_G : CHANNEL_A; - if (sc->sc_curaid && hw->conf.ht_cap.ht_supported) + if (sc->sc_curaid && hw->conf.ht.enabled) sc->sc_ah->ah_channels[pos].chanmode = ath_get_extchanmode(sc, curchan); @@ -1434,6 +1426,14 @@ static void ath9k_sta_notify(struct ieee80211_hw *hw, } else { ath_node_get(sc, sta->addr); } + + /* XXX: Is this right? Can the capabilities change? */ + an = ath_node_find(sc, sta->addr); + an->maxampdu = 1 << (IEEE80211_HTCAP_MAXRXAMPDU_FACTOR + + sta->ht_cap.ampdu_factor); + an->mpdudensity = + parse_mpdudensity(sta->ht_cap.ampdu_density); + spin_unlock_irqrestore(&sc->node_lock, flags); break; case STA_NOTIFY_REMOVE: @@ -1552,9 +1552,8 @@ static void ath9k_bss_info_changed(struct ieee80211_hw *hw, } if (changed & BSS_CHANGED_HT) { - DPRINTF(sc, ATH_DBG_CONFIG, "%s: BSS Changed HT %d\n", - __func__, - bss_conf->assoc_ht); + DPRINTF(sc, ATH_DBG_CONFIG, "%s: BSS Changed HT\n", + __func__); ath9k_ht_conf(sc, bss_conf); } diff --git a/drivers/net/wireless/ath9k/rc.c b/drivers/net/wireless/ath9k/rc.c index ee2dbce42b4d..9b2526030965 100644 --- a/drivers/net/wireless/ath9k/rc.c +++ b/drivers/net/wireless/ath9k/rc.c @@ -1838,7 +1838,7 @@ void ath_rc_node_update(struct ieee80211_hw *hw, struct ath_rate_node *rc_priv) struct ath_softc *sc = hw->priv; u32 capflag = 0; - if (hw->conf.ht_cap.ht_supported) { + if (hw->conf.ht.enabled) { capflag |= ATH_RC_HT_FLAG | ATH_RC_DS_FLAG; if (sc->sc_ht_info.tx_chan_width == ATH9K_HT_MACMODE_2040) capflag |= ATH_RC_CW40_FLAG; @@ -1979,7 +1979,7 @@ static void ath_get_rate(void *priv, struct ieee80211_supported_band *sband, /* Check if aggregation has to be enabled for this tid */ - if (hw->conf.ht_cap.ht_supported) { + if (hw->conf.ht.enabled) { if (ieee80211_is_data_qos(fc)) { qc = ieee80211_get_qos_ctl(hdr); tid = qc[0] & 0xf; @@ -2026,9 +2026,9 @@ static void ath_rate_init(void *priv, struct ieee80211_supported_band *sband, DPRINTF(sc, ATH_DBG_RATE, "%s\n", __func__); ath_setup_rates(sc, sband, sta, ath_rc_priv); - if (sc->hw->conf.flags & IEEE80211_CONF_SUPPORT_HT_MODE) { + if (sc->hw->conf.ht.enabled) { for (i = 0; i < 77; i++) { - if (sc->hw->conf.ht_cap.mcs.rx_mask[i/8] & (1<<(i%8))) + if (sta->ht_cap.mcs.rx_mask[i/8] & (1<<(i%8))) ath_rc_priv->neg_ht_rates.rs_rates[j++] = i; if (j == ATH_RATE_MAX) break; diff --git a/drivers/net/wireless/ath9k/recv.c b/drivers/net/wireless/ath9k/recv.c index 010fcdfec3f6..828322840a86 100644 --- a/drivers/net/wireless/ath9k/recv.c +++ b/drivers/net/wireless/ath9k/recv.c @@ -720,12 +720,11 @@ void ath_flushrecv(struct ath_softc *sc) int ath_rx_input(struct ath_softc *sc, struct ath_node *an, - int is_ampdu, struct sk_buff *skb, struct ath_recv_status *rx_status, enum ATH_RX_TYPE *status) { - if (is_ampdu && (sc->sc_flags & SC_OP_RXAGGR)) { + if (sc->sc_flags & SC_OP_RXAGGR) { *status = ATH_RX_CONSUMED; return ath_ampdu_input(sc, an, skb, rx_status); } else { diff --git a/drivers/net/wireless/ath9k/xmit.c b/drivers/net/wireless/ath9k/xmit.c index 3770fbe84fce..ba818cc2fb5c 100644 --- a/drivers/net/wireless/ath9k/xmit.c +++ b/drivers/net/wireless/ath9k/xmit.c @@ -300,7 +300,8 @@ static int ath_tx_prepare(struct ath_softc *sc, if (ieee80211_is_data(fc) && !txctl->use_minrate) { /* Enable HT only for DATA frames and not for EAPOL */ - txctl->ht = (hw->conf.ht_cap.ht_supported && + /* XXX why AMPDU only?? */ + txctl->ht = (hw->conf.ht.enabled && (tx_info->flags & IEEE80211_TX_CTL_AMPDU)); if (is_multicast_ether_addr(hdr->addr1)) { @@ -1450,7 +1451,8 @@ static int ath_tx_send_ampdu(struct ath_softc *sc, */ static u32 ath_lookup_rate(struct ath_softc *sc, - struct ath_buf *bf) + struct ath_buf *bf, + struct ath_atx_tid *tid) { const struct ath9k_rate_table *rt = sc->sc_currates; struct sk_buff *skb; @@ -1504,7 +1506,7 @@ static u32 ath_lookup_rate(struct ath_softc *sc, * The IE, however can hold upto 65536, which shows up here * as zero. Ignore 65536 since we are constrained by hw. */ - maxampdu = sc->sc_ht_info.maxampdu; + maxampdu = tid->an->maxampdu; if (maxampdu) aggr_limit = min(aggr_limit, maxampdu); @@ -1518,6 +1520,7 @@ static u32 ath_lookup_rate(struct ath_softc *sc, */ static int ath_compute_num_delims(struct ath_softc *sc, + struct ath_atx_tid *tid, struct ath_buf *bf, u16 frmlen) { @@ -1545,7 +1548,7 @@ static int ath_compute_num_delims(struct ath_softc *sc, * required minimum length for subframe. Take into account * whether high rate is 20 or 40Mhz and half or full GI. */ - mpdudensity = sc->sc_ht_info.mpdudensity; + mpdudensity = tid->an->mpdudensity; /* * If there is no mpdu density restriction, no further calculation @@ -1619,7 +1622,7 @@ static enum ATH_AGGR_STATUS ath_tx_form_aggr(struct ath_softc *sc, } if (!rl) { - aggr_limit = ath_lookup_rate(sc, bf); + aggr_limit = ath_lookup_rate(sc, bf, tid); rl = 1; /* * Is rate dual stream @@ -1657,7 +1660,7 @@ static enum ATH_AGGR_STATUS ath_tx_form_aggr(struct ath_softc *sc, * Get the delimiters needed to meet the MPDU * density for this node. */ - ndelim = ath_compute_num_delims(sc, bf_first, bf->bf_frmlen); + ndelim = ath_compute_num_delims(sc, tid, bf_first, bf->bf_frmlen); bpad = PADBYTES(al_delta) + (ndelim << 2); @@ -2629,7 +2632,7 @@ void ath_tx_node_init(struct ath_softc *sc, struct ath_node *an) struct ath_atx_ac *ac; int tidno, acno; - sc->sc_ht_info.maxampdu = ATH_AMPDU_LIMIT_DEFAULT; + an->maxampdu = ATH_AMPDU_LIMIT_DEFAULT; /* * Init per tid tx state diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c index cd1bff590491..e10e0ca09ce9 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c @@ -1133,8 +1133,7 @@ static int rs_switch_to_mimo2(struct iwl_priv *priv, s32 rate; s8 is_green = lq_sta->is_green; - if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) || - !sta->ht_cap.ht_supported) + if (!conf->ht.enabled || !sta->ht_cap.ht_supported) return -1; if (((sta->ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >> 2) @@ -1201,8 +1200,7 @@ static int rs_switch_to_siso(struct iwl_priv *priv, u8 is_green = lq_sta->is_green; s32 rate; - if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) || - !sta->ht_cap.ht_supported) + if (!conf->ht.enabled || !sta->ht_cap.ht_supported) return -1; IWL_DEBUG_RATE("LQ: try to switch to SISO\n"); @@ -2001,9 +1999,8 @@ lq_update: * stay with best antenna legacy modulation for a while * before next round of mode comparisons. */ tbl1 = &(lq_sta->lq_info[lq_sta->active_tbl]); - if (is_legacy(tbl1->lq_type) && - (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)) && - (lq_sta->action_counter >= 1)) { + if (is_legacy(tbl1->lq_type) && !conf->ht.enabled && + lq_sta->action_counter >= 1) { lq_sta->action_counter = 0; IWL_DEBUG_RATE("LQ: STAY in legacy table\n"); rs_set_stay_in_table(priv, 1, lq_sta); @@ -2238,19 +2235,19 @@ static void rs_rate_init(void *priv_r, struct ieee80211_supported_band *sband, * active_siso_rate mask includes 9 MBits (bit 5), and CCK (bits 0-3), * supp_rates[] does not; shift to convert format, force 9 MBits off. */ - lq_sta->active_siso_rate = conf->ht_cap.mcs.rx_mask[0] << 1; - lq_sta->active_siso_rate |= conf->ht_cap.mcs.rx_mask[0] & 0x1; + lq_sta->active_siso_rate = sta->ht_cap.mcs.rx_mask[0] << 1; + lq_sta->active_siso_rate |= sta->ht_cap.mcs.rx_mask[0] & 0x1; lq_sta->active_siso_rate &= ~((u16)0x2); lq_sta->active_siso_rate <<= IWL_FIRST_OFDM_RATE; /* Same here */ - lq_sta->active_mimo2_rate = conf->ht_cap.mcs.rx_mask[1] << 1; - lq_sta->active_mimo2_rate |= conf->ht_cap.mcs.rx_mask[1] & 0x1; + lq_sta->active_mimo2_rate = sta->ht_cap.mcs.rx_mask[1] << 1; + lq_sta->active_mimo2_rate |= sta->ht_cap.mcs.rx_mask[1] & 0x1; lq_sta->active_mimo2_rate &= ~((u16)0x2); lq_sta->active_mimo2_rate <<= IWL_FIRST_OFDM_RATE; - lq_sta->active_mimo3_rate = conf->ht_cap.mcs.rx_mask[2] << 1; - lq_sta->active_mimo3_rate |= conf->ht_cap.mcs.rx_mask[2] & 0x1; + lq_sta->active_mimo3_rate = sta->ht_cap.mcs.rx_mask[2] << 1; + lq_sta->active_mimo3_rate |= sta->ht_cap.mcs.rx_mask[2] & 0x1; lq_sta->active_mimo3_rate &= ~((u16)0x2); lq_sta->active_mimo3_rate <<= IWL_FIRST_OFDM_RATE; diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 79a24410dd0a..7c3eb3d8f7e7 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -552,17 +552,30 @@ static int iwl4965_send_beacon_cmd(struct iwl_priv *priv) static void iwl4965_ht_conf(struct iwl_priv *priv, struct ieee80211_bss_conf *bss_conf) { - struct ieee80211_sta_ht_cap *ht_conf = bss_conf->ht_cap; - struct ieee80211_ht_bss_info *ht_bss_conf = bss_conf->ht_bss_conf; + struct ieee80211_sta_ht_cap *ht_conf; struct iwl_ht_info *iwl_conf = &priv->current_ht_config; + struct ieee80211_sta *sta; IWL_DEBUG_MAC80211("enter: \n"); - iwl_conf->is_ht = bss_conf->assoc_ht; - if (!iwl_conf->is_ht) return; + + /* + * It is totally wrong to base global information on something + * that is valid only when associated, alas, this driver works + * that way and I don't know how to fix it. + */ + + rcu_read_lock(); + sta = ieee80211_find_sta(priv->hw, priv->bssid); + if (!sta) { + rcu_read_unlock(); + return; + } + ht_conf = &sta->ht_cap; + if (ht_conf->cap & IEEE80211_HT_CAP_SGI_20) iwl_conf->sgf |= HT_SHORT_GI_20MHZ; if (ht_conf->cap & IEEE80211_HT_CAP_SGI_40) @@ -574,8 +587,8 @@ static void iwl4965_ht_conf(struct iwl_priv *priv, iwl_conf->supported_chan_width = !!(ht_conf->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40); - iwl_conf->extension_chan_offset = - ht_bss_conf->bss_cap & IEEE80211_HT_PARAM_CHA_SEC_OFFSET; + + iwl_conf->extension_chan_offset = bss_conf->ht.secondary_channel_offset; /* If no above or below channel supplied disable FAT channel */ if (iwl_conf->extension_chan_offset != IEEE80211_HT_PARAM_CHA_SEC_ABOVE && iwl_conf->extension_chan_offset != IEEE80211_HT_PARAM_CHA_SEC_BELOW) { @@ -587,15 +600,14 @@ static void iwl4965_ht_conf(struct iwl_priv *priv, memcpy(&iwl_conf->mcs, &ht_conf->mcs, 16); - iwl_conf->control_channel = ht_bss_conf->primary_channel; - iwl_conf->tx_chan_width = - !!(ht_bss_conf->bss_cap & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY); + iwl_conf->tx_chan_width = bss_conf->ht.width_40_ok; iwl_conf->ht_protection = - ht_bss_conf->bss_op_mode & IEEE80211_HT_OP_MODE_PROTECTION; + bss_conf->ht.operation_mode & IEEE80211_HT_OP_MODE_PROTECTION; iwl_conf->non_GF_STA_present = - !!(ht_bss_conf->bss_op_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT); + !!(bss_conf->ht.operation_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT); + + rcu_read_unlock(); - IWL_DEBUG_MAC80211("control channel %d\n", iwl_conf->control_channel); IWL_DEBUG_MAC80211("leave\n"); } @@ -2746,6 +2758,8 @@ static int iwl4965_mac_config(struct ieee80211_hw *hw, u32 changed) mutex_lock(&priv->mutex); IWL_DEBUG_MAC80211("enter to channel %d\n", conf->channel->hw_value); + priv->current_ht_config.is_ht = conf->ht.enabled; + if (conf->radio_enabled && iwl_radio_kill_sw_enable_radio(priv)) { IWL_DEBUG_MAC80211("leave - RF-KILL - waiting for uCode\n"); goto out; @@ -3104,7 +3118,6 @@ static void iwl4965_bss_info_changed(struct ieee80211_hw *hw, } if (changes & BSS_CHANGED_HT) { - IWL_DEBUG_MAC80211("HT %d\n", bss_conf->assoc_ht); iwl4965_ht_conf(priv, bss_conf); iwl_set_rxon_chain(priv); } diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 4678da447ff6..10f5a0a233fe 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -637,8 +637,8 @@ u8 iwl_is_fat_tx_allowed(struct iwl_priv *priv, } return iwl_is_channel_extension(priv, priv->band, - iwl_ht_conf->control_channel, - iwl_ht_conf->extension_chan_offset); + le16_to_cpu(priv->staging_rxon.channel), + iwl_ht_conf->extension_chan_offset); } EXPORT_SYMBOL(iwl_is_fat_tx_allowed); @@ -663,13 +663,6 @@ void iwl_set_rxon_ht(struct iwl_priv *priv, struct iwl_ht_info *ht_info) rxon->flags &= ~(RXON_FLG_CHANNEL_MODE_MIXED_MSK | RXON_FLG_CHANNEL_MODE_PURE_40_MSK); - if (le16_to_cpu(rxon->channel) != ht_info->control_channel) { - IWL_DEBUG_ASSOC("control diff than current %d %d\n", - le16_to_cpu(rxon->channel), - ht_info->control_channel); - return; - } - /* Note: control channel is opposite of extension channel */ switch (ht_info->extension_chan_offset) { case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: @@ -692,14 +685,12 @@ void iwl_set_rxon_ht(struct iwl_priv *priv, struct iwl_ht_info *ht_info) IWL_DEBUG_ASSOC("supported HT rate 0x%X 0x%X 0x%X " "rxon flags 0x%X operation mode :0x%X " - "extension channel offset 0x%x " - "control chan %d\n", + "extension channel offset 0x%x\n", ht_info->mcs.rx_mask[0], ht_info->mcs.rx_mask[1], ht_info->mcs.rx_mask[2], le32_to_cpu(rxon->flags), ht_info->ht_protection, - ht_info->extension_chan_offset, - ht_info->control_channel); + ht_info->extension_chan_offset); return; } EXPORT_SYMBOL(iwl_set_rxon_ht); diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h index 572250ee9d58..2e9514933970 100644 --- a/drivers/net/wireless/iwlwifi/iwl-dev.h +++ b/drivers/net/wireless/iwlwifi/iwl-dev.h @@ -413,7 +413,6 @@ struct iwl_ht_info { u8 mpdu_density; struct ieee80211_mcs_info mcs; /* BSS related data */ - u8 control_channel; u8 extension_chan_offset; u8 tx_chan_width; u8 ht_protection; diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c index b9b8554433a6..218483d096cc 100644 --- a/drivers/net/wireless/iwlwifi/iwl-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-sta.c @@ -890,20 +890,31 @@ static void iwl_sta_init_lq(struct iwl_priv *priv, const u8 *addr, int is_ap) */ int iwl_rxon_add_station(struct iwl_priv *priv, const u8 *addr, int is_ap) { + struct ieee80211_sta *sta; + struct ieee80211_sta_ht_cap ht_config; + struct ieee80211_sta_ht_cap *cur_ht_config = NULL; u8 sta_id; /* Add station to device's station table */ - struct ieee80211_conf *conf = &priv->hw->conf; - struct ieee80211_sta_ht_cap *cur_ht_config = &conf->ht_cap; - - if ((is_ap) && - (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) && - (priv->iw_mode == NL80211_IFTYPE_STATION)) - sta_id = iwl_add_station_flags(priv, addr, is_ap, - 0, cur_ht_config); - else - sta_id = iwl_add_station_flags(priv, addr, is_ap, - 0, NULL); + + /* + * XXX: This check is definitely not correct, if we're an AP + * it'll always be false which is not what we want, but + * it doesn't look like iwlagn is prepared to be an HT + * AP anyway. + */ + if (priv->current_ht_config.is_ht) { + rcu_read_lock(); + sta = ieee80211_find_sta(priv->hw, addr); + if (sta) { + memcpy(&ht_config, &sta->ht_cap, sizeof(ht_config)); + cur_ht_config = &ht_config; + } + rcu_read_unlock(); + } + + sta_id = iwl_add_station_flags(priv, addr, is_ap, + 0, cur_ht_config); /* Set up default rate scaling table in device's station table */ iwl_sta_init_lq(priv, addr, is_ap); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 94ff3eface49..9801afb62545 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3,7 +3,7 @@ * * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc - * Copyright 2007 Johannes Berg + * Copyright 2007-2008 Johannes Berg * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -81,22 +81,6 @@ enum ieee80211_notification_types { IEEE80211_NOTIFY_RE_ASSOC, }; -/** - * struct ieee80211_ht_bss_info - describing BSS's HT characteristics - * - * This structure describes most essential parameters needed - * to describe 802.11n HT characteristics in a BSS. - * - * @primary_channel: channel number of primery channel - * @bss_cap: 802.11n's general BSS capabilities (e.g. channel width) - * @bss_op_mode: 802.11n's BSS operation modes (e.g. HT protection) - */ -struct ieee80211_ht_bss_info { - u8 primary_channel; - u8 bss_cap; /* use IEEE80211_HT_IE_CHA_ */ - u8 bss_op_mode; /* use IEEE80211_HT_IE_ */ -}; - /** * enum ieee80211_max_queues - maximum number of queues * @@ -171,6 +155,19 @@ enum ieee80211_bss_change { BSS_CHANGED_BASIC_RATES = 1<<5, }; +/** + * struct ieee80211_bss_ht_conf - BSS's changing HT configuration + * @secondary_channel_offset: secondary channel offset, uses + * %IEEE80211_HT_PARAM_CHA_SEC_ values + * @width_40_ok: indicates that 40 MHz bandwidth may be used for TX + * @operation_mode: HT operation mode (like in &struct ieee80211_ht_info) + */ +struct ieee80211_bss_ht_conf { + u8 secondary_channel_offset; + bool width_40_ok; + u16 operation_mode; +}; + /** * struct ieee80211_bss_conf - holds the BSS's changing parameters * @@ -190,9 +187,7 @@ enum ieee80211_bss_change { * @timestamp: beacon timestamp * @beacon_int: beacon interval * @assoc_capability: capabilities taken from assoc resp - * @assoc_ht: association in HT mode - * @ht_cap: ht capabilities - * @ht_bss_conf: ht extended capabilities + * @ht: BSS's HT configuration * @basic_rates: bitmap of basic rates, each bit stands for an * index into the rate table configured by the driver in * the current band. @@ -210,10 +205,7 @@ struct ieee80211_bss_conf { u16 assoc_capability; u64 timestamp; u64 basic_rates; - /* ht related data */ - bool assoc_ht; - struct ieee80211_sta_ht_cap *ht_cap; - struct ieee80211_ht_bss_info *ht_bss_conf; + struct ieee80211_bss_ht_conf ht; }; /** @@ -447,13 +439,11 @@ struct ieee80211_rx_status { * Flags to define PHY configuration options * * @IEEE80211_CONF_RADIOTAP: add radiotap header at receive time (if supported) - * @IEEE80211_CONF_SUPPORT_HT_MODE: use 802.11n HT capabilities (if supported) * @IEEE80211_CONF_PS: Enable 802.11 power save mode */ enum ieee80211_conf_flags { IEEE80211_CONF_RADIOTAP = (1<<0), - IEEE80211_CONF_SUPPORT_HT_MODE = (1<<1), - IEEE80211_CONF_PS = (1<<2), + IEEE80211_CONF_PS = (1<<1), }; /* XXX: remove all this once drivers stop trying to use it */ @@ -463,6 +453,10 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) } #define IEEE80211_CONF_SHORT_SLOT_TIME (__IEEE80211_CONF_SHORT_SLOT_TIME()) +struct ieee80211_ht_conf { + bool enabled; +}; + /** * enum ieee80211_conf_changed - denotes which configuration changed * @@ -474,6 +468,7 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) * @IEEE80211_CONF_CHANGE_POWER: the TX power changed * @IEEE80211_CONF_CHANGE_CHANNEL: the channel changed * @IEEE80211_CONF_CHANGE_RETRY_LIMITS: retry limits changed + * @IEEE80211_CONF_CHANGE_HT: HT configuration changed */ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_RADIO_ENABLED = BIT(0), @@ -484,6 +479,7 @@ enum ieee80211_conf_changed { IEEE80211_CONF_CHANGE_POWER = BIT(5), IEEE80211_CONF_CHANGE_CHANNEL = BIT(6), IEEE80211_CONF_CHANGE_RETRY_LIMITS = BIT(7), + IEEE80211_CONF_CHANGE_HT = BIT(8), }; /** @@ -496,9 +492,8 @@ enum ieee80211_conf_changed { * @listen_interval: listen interval in units of beacon interval * @flags: configuration flags defined above * @power_level: requested transmit power (in dBm) - * @ht_cap: describes current self configuration of 802.11n HT capabilities - * @ht_bss_conf: describes current BSS configuration of 802.11n HT parameters * @channel: the channel to tune to + * @ht: the HT configuration for the device * @long_frame_max_tx_count: Maximum number of transmissions for a "long" frame * (a frame not RTS protected), called "dot11LongRetryLimit" in 802.11, * but actually means the number of transmissions not the number of retries @@ -517,9 +512,7 @@ struct ieee80211_conf { u8 long_frame_max_tx_count, short_frame_max_tx_count; struct ieee80211_channel *channel; - - struct ieee80211_sta_ht_cap ht_cap; - struct ieee80211_ht_bss_info ht_bss_conf; + struct ieee80211_ht_conf ht; }; /** @@ -715,7 +708,7 @@ enum set_key_cmd { * @addr: MAC address * @aid: AID we assigned to the station if we're an AP * @supp_rates: Bitmap of supported rates (per band) - * @ht_cap: HT capabilities of this STA + * @ht_cap: HT capabilities of this STA; restricted to our own TX capabilities * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *), size is determined in hw information. */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 28382b5c7c25..55e3a26510ed 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -582,6 +582,8 @@ static void sta_apply_parameters(struct ieee80211_local *local, struct ieee80211_supported_band *sband; struct ieee80211_sub_if_data *sdata = sta->sdata; + sband = local->hw.wiphy->bands[local->oper_channel->band]; + /* * FIXME: updating the flags is racy when this function is * called from ieee80211_change_station(), this will @@ -622,7 +624,6 @@ static void sta_apply_parameters(struct ieee80211_local *local, if (params->supported_rates) { rates = 0; - sband = local->hw.wiphy->bands[local->oper_channel->band]; for (i = 0; i < params->supported_rates_len; i++) { int rate = (params->supported_rates[i] & 0x7f) * 5; @@ -635,7 +636,8 @@ static void sta_apply_parameters(struct ieee80211_local *local, } if (params->ht_capa) - ieee80211_ht_cap_ie_to_sta_ht_cap(params->ht_capa, + ieee80211_ht_cap_ie_to_sta_ht_cap(sband, + params->ht_capa, &sta->sta.ht_cap); if (ieee80211_vif_is_mesh(&sdata->vif) && params->plink_action) { diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index e2d121bf2745..42c3e590df98 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -20,114 +20,38 @@ #include "sta_info.h" #include "wme.h" -void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_ht_cap *ht_cap_ie, +void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, + struct ieee80211_ht_cap *ht_cap_ie, struct ieee80211_sta_ht_cap *ht_cap) { + u8 ampdu_info, tx_mcs_set_cap; + int i, max_tx_streams; BUG_ON(!ht_cap); memset(ht_cap, 0, sizeof(*ht_cap)); - if (ht_cap_ie) { - u8 ampdu_info = ht_cap_ie->ampdu_params_info; - - ht_cap->ht_supported = true; - ht_cap->cap = le16_to_cpu(ht_cap_ie->cap_info); - ht_cap->ampdu_factor = - ampdu_info & IEEE80211_HT_AMPDU_PARM_FACTOR; - ht_cap->ampdu_density = - (ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2; - memcpy(&ht_cap->mcs, &ht_cap_ie->mcs, sizeof(ht_cap->mcs)); - } else - ht_cap->ht_supported = false; -} - -void ieee80211_ht_info_ie_to_ht_bss_info( - struct ieee80211_ht_info *ht_add_info_ie, - struct ieee80211_ht_bss_info *bss_info) -{ - BUG_ON(!bss_info); - - memset(bss_info, 0, sizeof(*bss_info)); - - if (ht_add_info_ie) { - u16 op_mode; - op_mode = le16_to_cpu(ht_add_info_ie->operation_mode); - - bss_info->primary_channel = ht_add_info_ie->control_chan; - bss_info->bss_cap = ht_add_info_ie->ht_param; - bss_info->bss_op_mode = (u8)(op_mode & 0xff); - } -} - -/* - * ieee80211_handle_ht should be called only after the operating band - * has been determined as ht configuration depends on the hw's - * HT abilities for a specific band. - */ -u32 ieee80211_handle_ht(struct ieee80211_local *local, - struct ieee80211_sta_ht_cap *req_ht_cap, - struct ieee80211_ht_bss_info *req_bss_cap) -{ - struct ieee80211_conf *conf = &local->hw.conf; - struct ieee80211_supported_band *sband; - struct ieee80211_sta_ht_cap ht_cap; - struct ieee80211_ht_bss_info ht_bss_conf; - u32 changed = 0; - int i; - u8 max_tx_streams; - u8 tx_mcs_set_cap; - bool enable_ht = true; - - sband = local->hw.wiphy->bands[conf->channel->band]; - - memset(&ht_cap, 0, sizeof(ht_cap)); - memset(&ht_bss_conf, 0, sizeof(struct ieee80211_ht_bss_info)); - - /* HT is not supported */ - if (!sband->ht_cap.ht_supported) - enable_ht = false; - - /* disable HT */ - if (!enable_ht) { - if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) - changed |= BSS_CHANGED_HT; - conf->flags &= ~IEEE80211_CONF_SUPPORT_HT_MODE; - conf->ht_cap.ht_supported = false; - return changed; - } - - - if (!(conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE)) - changed |= BSS_CHANGED_HT; - - conf->flags |= IEEE80211_CONF_SUPPORT_HT_MODE; - ht_cap.ht_supported = true; + if (!ht_cap_ie) + return; - ht_cap.cap = req_ht_cap->cap & sband->ht_cap.cap; - ht_cap.cap &= ~IEEE80211_HT_CAP_SM_PS; - ht_cap.cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS; + ht_cap->ht_supported = true; - ht_bss_conf.primary_channel = req_bss_cap->primary_channel; - ht_bss_conf.bss_cap = req_bss_cap->bss_cap; - ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode; + ht_cap->cap = ht_cap->cap & sband->ht_cap.cap; + ht_cap->cap &= ~IEEE80211_HT_CAP_SM_PS; + ht_cap->cap |= sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS; - ht_cap.ampdu_factor = req_ht_cap->ampdu_factor; - ht_cap.ampdu_density = req_ht_cap->ampdu_density; + ampdu_info = ht_cap_ie->ampdu_params_info; + ht_cap->ampdu_factor = + ampdu_info & IEEE80211_HT_AMPDU_PARM_FACTOR; + ht_cap->ampdu_density = + (ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2; /* own MCS TX capabilities */ tx_mcs_set_cap = sband->ht_cap.mcs.tx_params; - /* - * configure supported Tx MCS according to requested MCS - * (based in most cases on Rx capabilities of peer) and self - * Tx MCS capabilities (as defined by low level driver HW - * Tx capabilities) - */ - /* can we TX with MCS rates? */ if (!(tx_mcs_set_cap & IEEE80211_HT_MCS_TX_DEFINED)) - goto check_changed; + return; /* Counting from 0, therefore +1 */ if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_RX_DIFF) @@ -145,29 +69,73 @@ u32 ieee80211_handle_ht(struct ieee80211_local *local, * - remainder are multiple spatial streams using unequal modulation */ for (i = 0; i < max_tx_streams; i++) - ht_cap.mcs.rx_mask[i] = - sband->ht_cap.mcs.rx_mask[i] & - req_ht_cap->mcs.rx_mask[i]; + ht_cap->mcs.rx_mask[i] = + sband->ht_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION) for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE; i < IEEE80211_HT_MCS_MASK_LEN; i++) - ht_cap.mcs.rx_mask[i] = + ht_cap->mcs.rx_mask[i] = sband->ht_cap.mcs.rx_mask[i] & - req_ht_cap->mcs.rx_mask[i]; + ht_cap_ie->mcs.rx_mask[i]; /* handle MCS rate 32 too */ - if (sband->ht_cap.mcs.rx_mask[32/8] & - req_ht_cap->mcs.rx_mask[32/8] & 1) - ht_cap.mcs.rx_mask[32/8] |= 1; + if (sband->ht_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1) + ht_cap->mcs.rx_mask[32/8] |= 1; +} + +/* + * ieee80211_enable_ht should be called only after the operating band + * has been determined as ht configuration depends on the hw's + * HT abilities for a specific band. + */ +u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, + struct ieee80211_ht_info *hti, + u16 ap_ht_cap_flags) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + struct ieee80211_bss_ht_conf ht; + u32 changed = 0; + bool enable_ht = true, ht_changed; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + + memset(&ht, 0, sizeof(ht)); + + /* HT is not supported */ + if (!sband->ht_cap.ht_supported) + enable_ht = false; + + /* check that channel matches the right operating channel */ + if (local->hw.conf.channel->center_freq != + ieee80211_channel_to_frequency(hti->control_chan)) + enable_ht = false; + + /* + * XXX: This is totally incorrect when there are multiple virtual + * interfaces, needs to be fixed later. + */ + ht_changed = local->hw.conf.ht.enabled != enable_ht; + local->hw.conf.ht.enabled = enable_ht; + if (ht_changed) + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_HT); + + /* disable HT */ + if (!enable_ht) + return 0; + ht.secondary_channel_offset = + hti->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET; + ht.width_40_ok = + !(ap_ht_cap_flags & IEEE80211_HT_CAP_40MHZ_INTOLERANT) && + (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) && + (hti->ht_param & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY); + ht.operation_mode = le16_to_cpu(hti->operation_mode); - check_changed: /* if bss configuration changed store the new one */ - if (memcmp(&conf->ht_cap, &ht_cap, sizeof(ht_cap)) || - memcmp(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf))) { + if (memcmp(&sdata->vif.bss_conf.ht, &ht, sizeof(ht))) { changed |= BSS_CHANGED_HT; - memcpy(&conf->ht_cap, &ht_cap, sizeof(ht_cap)); - memcpy(&conf->ht_bss_conf, &ht_bss_conf, sizeof(ht_bss_conf)); + sdata->vif.bss_conf.ht = ht; } return changed; @@ -900,8 +868,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, /* sanity check for incoming parameters: * check if configuration can support the BA policy * and if buffer size does not exceeds max value */ + /* XXX: check own ht delayed BA capability?? */ if (((ba_policy != 1) - && (!(conf->ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) + && (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) || (buf_size > IEEE80211_MAX_AMPDU_BUF)) { status = WLAN_STATUS_INVALID_QOS_PARAM; #ifdef CONFIG_MAC80211_HT_DEBUG diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 859b5b001f22..6f8756d26a93 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -955,14 +955,12 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); /* HT */ -void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_ht_cap *ht_cap_ie, +void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, + struct ieee80211_ht_cap *ht_cap_ie, struct ieee80211_sta_ht_cap *ht_cap); -void ieee80211_ht_info_ie_to_ht_bss_info( - struct ieee80211_ht_info *ht_add_info_ie, - struct ieee80211_ht_bss_info *bss_info); -u32 ieee80211_handle_ht(struct ieee80211_local *local, - struct ieee80211_sta_ht_cap *req_ht_cap, - struct ieee80211_ht_bss_info *req_bss_cap); +u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, + struct ieee80211_ht_info *hti, + u16 ap_ht_cap_flags); void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn); void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 9c5f5c37a49e..39bc9c69893b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -700,14 +700,15 @@ static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata, static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) + struct ieee80211_if_sta *ifsta, + u32 bss_info_changed) { struct ieee80211_local *local = sdata->local; struct ieee80211_conf *conf = &local_to_hw(local)->conf; - u32 changed = BSS_CHANGED_ASSOC; struct ieee80211_bss *bss; + bss_info_changed |= BSS_CHANGED_ASSOC; ifsta->flags |= IEEE80211_STA_ASSOCIATED; if (sdata->vif.type != NL80211_IFTYPE_STATION) @@ -722,19 +723,12 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.timestamp = bss->timestamp; sdata->vif.bss_conf.dtim_period = bss->dtim_period; - changed |= ieee80211_handle_bss_capability(sdata, + bss_info_changed |= ieee80211_handle_bss_capability(sdata, bss->capability, bss->has_erp_value, bss->erp_value); ieee80211_rx_bss_put(local, bss); } - if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) { - changed |= BSS_CHANGED_HT; - sdata->vif.bss_conf.assoc_ht = 1; - sdata->vif.bss_conf.ht_cap = &conf->ht_cap; - sdata->vif.bss_conf.ht_bss_conf = &conf->ht_bss_conf; - } - ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); ieee80211_sta_send_associnfo(sdata, ifsta); @@ -748,8 +742,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, * when we have associated, we aren't checking whether it actually * changed or not. */ - changed |= BSS_CHANGED_BASIC_RATES; - ieee80211_bss_info_change_notify(sdata, changed); + bss_info_changed |= BSS_CHANGED_BASIC_RATES; + ieee80211_bss_info_change_notify(sdata, bss_info_changed); netif_tx_start_all_queues(sdata->dev); netif_carrier_on(sdata->dev); @@ -813,7 +807,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct sta_info *sta; - u32 changed = BSS_CHANGED_ASSOC; + u32 changed = 0; rcu_read_lock(); @@ -847,15 +841,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; changed |= ieee80211_reset_erp_info(sdata); - if (sdata->vif.bss_conf.assoc_ht) - changed |= BSS_CHANGED_HT; - - sdata->vif.bss_conf.assoc_ht = 0; - sdata->vif.bss_conf.ht_cap = NULL; - sdata->vif.bss_conf.ht_bss_conf = NULL; - ieee80211_led_assoc(local, 0); - sdata->vif.bss_conf.assoc = 0; + changed |= BSS_CHANGED_ASSOC; + sdata->vif.bss_conf.assoc = false; ieee80211_sta_send_apinfo(sdata, ifsta); @@ -867,6 +855,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); sta_info_destroy(sta); + + local->hw.conf.ht.enabled = false; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_HT); + + ieee80211_bss_info_change_notify(sdata, changed); } static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata) @@ -1184,8 +1177,10 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems elems; struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; u8 *pos; + u32 changed = 0; int i, j; bool have_higher_than_11mbit = false; + u16 ap_ht_cap_flags; /* AssocResp and ReassocResp have identical structure, so process both * of them in this function. */ @@ -1333,15 +1328,11 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, else sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; - if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param && - (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { - struct ieee80211_ht_bss_info bss_info; - ieee80211_ht_cap_ie_to_sta_ht_cap( + if (elems.ht_cap_elem) + ieee80211_ht_cap_ie_to_sta_ht_cap(sband, elems.ht_cap_elem, &sta->sta.ht_cap); - ieee80211_ht_info_ie_to_ht_bss_info( - elems.ht_info_elem, &bss_info); - ieee80211_handle_ht(local, &sta->sta.ht_cap, &bss_info); - } + + ap_ht_cap_flags = sta->sta.ht_cap.cap; rate_control_rate_init(sta); @@ -1353,11 +1344,16 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, } else rcu_read_unlock(); + if (elems.ht_info_elem && elems.wmm_param && + (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) + changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem, + ap_ht_cap_flags); + /* set AID and assoc capability, * ieee80211_set_associated() will tell the driver */ bss_conf->aid = aid; bss_conf->assoc_capability = capab_info; - ieee80211_set_associated(sdata, ifsta); + ieee80211_set_associated(sdata, ifsta, changed); ieee80211_associated(sdata, ifsta); } @@ -1657,7 +1653,6 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, size_t baselen; struct ieee802_11_elems elems; struct ieee80211_local *local = sdata->local; - struct ieee80211_conf *conf = &local->hw.conf; u32 changed = 0; bool erp_valid; u8 erp_value = 0; @@ -1693,14 +1688,31 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, le16_to_cpu(mgmt->u.beacon.capab_info), erp_valid, erp_value); - if (elems.ht_cap_elem && elems.ht_info_elem && - elems.wmm_param && conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) { - struct ieee80211_ht_bss_info bss_info; - ieee80211_ht_info_ie_to_ht_bss_info( - elems.ht_info_elem, &bss_info); - changed |= ieee80211_handle_ht(local, &conf->ht_cap, - &bss_info); + if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param) { + struct sta_info *sta; + struct ieee80211_supported_band *sband; + u16 ap_ht_cap_flags; + + rcu_read_lock(); + + sta = sta_info_get(local, ifsta->bssid); + if (!sta) { + rcu_read_unlock(); + return; + } + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + + ieee80211_ht_cap_ie_to_sta_ht_cap(sband, + elems.ht_cap_elem, &sta->sta.ht_cap); + + ap_ht_cap_flags = sta->sta.ht_cap.cap; + + rcu_read_unlock(); + + changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem, + ap_ht_cap_flags); } ieee80211_bss_info_change_notify(sdata, changed); -- cgit v1.2.3 From e6a9854b05c1a6af1308fe2b8c68f35abf28a3ee Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 21 Oct 2008 12:40:02 +0200 Subject: mac80211/drivers: rewrite the rate control API So after the previous changes we were still unhappy with how convoluted the API is and decided to make things simpler for everybody. This completely changes the rate control API, now taking into account 802.11n with MCS rates and more control, most drivers don't support that though. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/adm8211.c | 21 +- drivers/net/wireless/ath5k/base.c | 34 ++- drivers/net/wireless/ath9k/main.c | 18 +- drivers/net/wireless/ath9k/rc.c | 40 +-- drivers/net/wireless/ath9k/xmit.c | 28 ++- drivers/net/wireless/b43/dma.c | 4 +- drivers/net/wireless/b43/main.c | 2 +- drivers/net/wireless/b43/pio.c | 3 +- drivers/net/wireless/b43/xmit.c | 60 ++++- drivers/net/wireless/b43/xmit.h | 5 +- drivers/net/wireless/b43legacy/dma.c | 46 ++-- drivers/net/wireless/b43legacy/main.c | 2 +- drivers/net/wireless/b43legacy/pio.c | 31 ++- drivers/net/wireless/b43legacy/xmit.c | 26 +- drivers/net/wireless/b43legacy/xmit.h | 2 +- drivers/net/wireless/iwlwifi/iwl-3945-rs.c | 57 ++--- drivers/net/wireless/iwlwifi/iwl-3945.c | 65 ++++- drivers/net/wireless/iwlwifi/iwl-3945.h | 2 + drivers/net/wireless/iwlwifi/iwl-4965.c | 8 +- drivers/net/wireless/iwlwifi/iwl-5000.c | 8 +- drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 37 ++- drivers/net/wireless/iwlwifi/iwl-core.c | 19 +- drivers/net/wireless/iwlwifi/iwl3945-base.c | 5 +- drivers/net/wireless/libertas_tf/main.c | 4 +- drivers/net/wireless/mac80211_hwsim.c | 12 +- drivers/net/wireless/p54/p54common.c | 26 +- drivers/net/wireless/rt2x00/rt2x00dev.c | 14 +- drivers/net/wireless/rt2x00/rt2x00mac.c | 14 +- drivers/net/wireless/rt2x00/rt2x00queue.c | 22 +- drivers/net/wireless/rt2x00/rt2x00queue.h | 9 +- drivers/net/wireless/rtl8180_dev.c | 28 +-- drivers/net/wireless/rtl8187_dev.c | 18 +- drivers/net/wireless/zd1211rw/zd_mac.c | 32 +-- drivers/net/wireless/zd1211rw/zd_usb.c | 2 +- include/net/mac80211.h | 254 +++++++++++-------- net/mac80211/ieee80211_i.h | 8 - net/mac80211/main.c | 54 +++- net/mac80211/mesh_hwmp.c | 6 +- net/mac80211/rate.c | 52 ++-- net/mac80211/rate.h | 5 +- net/mac80211/rc80211_minstrel.c | 72 +++--- net/mac80211/rc80211_pid.h | 1 + net/mac80211/rc80211_pid_algo.c | 27 +- net/mac80211/rc80211_pid_debugfs.c | 5 +- net/mac80211/sta_info.h | 4 +- net/mac80211/tx.c | 378 +++++++++++++--------------- net/mac80211/wext.c | 4 +- 47 files changed, 889 insertions(+), 685 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c index 9a1e0c514c08..b96ebfe4ef3e 100644 --- a/drivers/net/wireless/adm8211.c +++ b/drivers/net/wireless/adm8211.c @@ -341,15 +341,14 @@ static void adm8211_interrupt_tci(struct ieee80211_hw *dev) pci_unmap_single(priv->pdev, info->mapping, info->skb->len, PCI_DMA_TODEVICE); - memset(&txi->status, 0, sizeof(txi->status)); + ieee80211_tx_info_clear_status(txi); + skb_pull(skb, sizeof(struct adm8211_tx_hdr)); memcpy(skb_push(skb, info->hdrlen), skb->cb, info->hdrlen); - if (!(txi->flags & IEEE80211_TX_CTL_NO_ACK)) { - if (status & TDES0_STATUS_ES) - txi->status.excessive_retries = 1; - else - txi->flags |= IEEE80211_TX_STAT_ACK; - } + if (!(txi->flags & IEEE80211_TX_CTL_NO_ACK) && + !(status & TDES0_STATUS_ES)) + txi->flags |= IEEE80211_TX_STAT_ACK; + ieee80211_tx_status_irqsafe(dev, skb); info->skb = NULL; @@ -1691,8 +1690,10 @@ static int adm8211_tx(struct ieee80211_hw *dev, struct sk_buff *skb) struct ieee80211_hdr *hdr; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_rate *txrate = ieee80211_get_tx_rate(dev, info); + u8 rc_flags; - short_preamble = !!(txrate->flags & IEEE80211_TX_CTL_SHORT_PREAMBLE); + rc_flags = info->control.rates[0].flags; + short_preamble = !!(rc_flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE); plcp_signal = txrate->bitrate; hdr = (struct ieee80211_hdr *)skb->data; @@ -1724,10 +1725,10 @@ static int adm8211_tx(struct ieee80211_hw *dev, struct sk_buff *skb) if (short_preamble) txhdr->header_control |= cpu_to_le16(ADM8211_TXHDRCTL_SHORT_PREAMBLE); - if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) + if (rc_flags & IEEE80211_TX_RC_USE_RTS_CTS) txhdr->header_control |= cpu_to_le16(ADM8211_TXHDRCTL_ENABLE_RTS); - txhdr->retry_limit = info->control.retry_limit; + txhdr->retry_limit = info->control.rates[0].count; adm8211_tx_raw(dev, skb, plcp_signal, hdrlen); diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c index 44401f6f578d..3773d983ea66 100644 --- a/drivers/net/wireless/ath5k/base.c +++ b/drivers/net/wireless/ath5k/base.c @@ -541,8 +541,8 @@ ath5k_pci_probe(struct pci_dev *pdev, /* set up multi-rate retry capabilities */ if (sc->ah->ah_version == AR5K_AR5212) { - hw->max_altrates = 3; - hw->max_altrate_tries = 11; + hw->max_rates = 4; + hw->max_rate_tries = 11; } /* Finish private driver data initialization */ @@ -1181,7 +1181,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf) ieee80211_get_hdrlen_from_skb(skb), AR5K_PKT_TYPE_NORMAL, (sc->power_level * 2), ieee80211_get_tx_rate(sc->hw, info)->hw_value, - info->control.retry_limit, keyidx, 0, flags, 0, 0); + info->control.rates[0].count, keyidx, 0, flags, 0, 0); if (ret) goto err_unmap; @@ -1193,7 +1193,7 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf) break; mrr_rate[i] = rate->hw_value; - mrr_tries[i] = info->control.retries[i].limit; + mrr_tries[i] = info->control.rates[i + 1].count; } ah->ah_setup_mrr_tx_desc(ah, ds, @@ -1849,30 +1849,26 @@ ath5k_tx_processq(struct ath5k_softc *sc, struct ath5k_txq *txq) pci_unmap_single(sc->pdev, bf->skbaddr, skb->len, PCI_DMA_TODEVICE); - memset(&info->status, 0, sizeof(info->status)); - info->tx_rate_idx = ath5k_hw_to_driver_rix(sc, - ts.ts_rate[ts.ts_final_idx]); - info->status.retry_count = ts.ts_longretry; - + ieee80211_tx_info_clear_status(info); for (i = 0; i < 4; i++) { - struct ieee80211_tx_altrate *r = - &info->status.retries[i]; + struct ieee80211_tx_rate *r = + &info->status.rates[i]; if (ts.ts_rate[i]) { - r->rate_idx = ath5k_hw_to_driver_rix(sc, ts.ts_rate[i]); - r->limit = ts.ts_retry[i]; + r->idx = ath5k_hw_to_driver_rix(sc, ts.ts_rate[i]); + r->count = ts.ts_retry[i]; } else { - r->rate_idx = -1; - r->limit = 0; + r->idx = -1; + r->count = 0; } } - info->status.excessive_retries = 0; + /* count the successful attempt as well */ + info->status.rates[ts.ts_final_idx].count++; + if (unlikely(ts.ts_status)) { sc->ll_stats.dot11ACKFailureCount++; - if (ts.ts_status & AR5K_TXERR_XRETRY) - info->status.excessive_retries = 1; - else if (ts.ts_status & AR5K_TXERR_FILT) + if (ts.ts_status & AR5K_TXERR_FILT) info->flags |= IEEE80211_TX_STAT_TX_FILTERED; } else { info->flags |= IEEE80211_TX_STAT_ACK; diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c index 32acaf7ff622..a7656a3ea1b0 100644 --- a/drivers/net/wireless/ath9k/main.c +++ b/drivers/net/wireless/ath9k/main.c @@ -457,12 +457,13 @@ void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, DPRINTF(sc, ATH_DBG_XMIT, "%s: TX complete: skb: %p\n", __func__, skb); + ieee80211_tx_info_clear_status(tx_info); if (tx_info->flags & IEEE80211_TX_CTL_NO_ACK || tx_info->flags & IEEE80211_TX_STAT_TX_FILTERED) { - /* free driver's private data area of tx_info */ - if (tx_info->driver_data[0] != NULL) - kfree(tx_info->driver_data[0]); - tx_info->driver_data[0] = NULL; + /* free driver's private data area of tx_info, XXX: HACK! */ + if (tx_info->control.vif != NULL) + kfree(tx_info->control.vif); + tx_info->control.vif = NULL; } if (tx_status->flags & ATH_TX_BAR) { @@ -470,17 +471,12 @@ void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, tx_status->flags &= ~ATH_TX_BAR; } - if (tx_status->flags & (ATH_TX_ERROR | ATH_TX_XRETRY)) { - if (!(tx_info->flags & IEEE80211_TX_CTL_NO_ACK)) { - /* Frame was not ACKed, but an ACK was expected */ - tx_info->status.excessive_retries = 1; - } - } else { + if (!(tx_status->flags & (ATH_TX_ERROR | ATH_TX_XRETRY))) { /* Frame was ACKed */ tx_info->flags |= IEEE80211_TX_STAT_ACK; } - tx_info->status.retry_count = tx_status->retries; + tx_info->status.rates[0].count = tx_status->retries + 1; ieee80211_tx_status(hw, skb); if (an) diff --git a/drivers/net/wireless/ath9k/rc.c b/drivers/net/wireless/ath9k/rc.c index 9b2526030965..6afafeddeda2 100644 --- a/drivers/net/wireless/ath9k/rc.c +++ b/drivers/net/wireless/ath9k/rc.c @@ -1864,24 +1864,21 @@ static void ath_tx_status(void *priv, struct ieee80211_supported_band *sband, hdr = (struct ieee80211_hdr *)skb->data; fc = hdr->frame_control; - tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0]; + /* XXX: UGLY HACK!! */ + tx_info_priv = (struct ath_tx_info_priv *)tx_info->control.vif; spin_lock_bh(&sc->node_lock); an = ath_node_find(sc, hdr->addr1); spin_unlock_bh(&sc->node_lock); - if (!an || !priv_sta || !ieee80211_is_data(fc)) { - if (tx_info->driver_data[0] != NULL) { - kfree(tx_info->driver_data[0]); - tx_info->driver_data[0] = NULL; - } + if (tx_info_priv == NULL) return; - } - if (tx_info->driver_data[0] != NULL) { + + if (an && priv_sta && ieee80211_is_data(fc)) ath_rate_tx_complete(sc, an, priv_sta, tx_info_priv); - kfree(tx_info->driver_data[0]); - tx_info->driver_data[0] = NULL; - } + + kfree(tx_info_priv); + tx_info->control.vif = NULL; } static void ath_tx_aggr_resp(struct ath_softc *sc, @@ -1927,10 +1924,11 @@ static void ath_tx_aggr_resp(struct ath_softc *sc, } } -static void ath_get_rate(void *priv, struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, - struct sk_buff *skb, struct rate_selection *sel) +static void ath_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, + struct ieee80211_tx_rate_control *txrc) { + struct ieee80211_supported_band *sband = txrc->sband; + struct sk_buff *skb = txrc->skb; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct ath_softc *sc = priv; struct ieee80211_hw *hw = sc->hw; @@ -1945,17 +1943,17 @@ static void ath_get_rate(void *priv, struct ieee80211_supported_band *sband, DPRINTF(sc, ATH_DBG_RATE, "%s\n", __func__); - /* allocate driver private area of tx_info */ - tx_info->driver_data[0] = kzalloc(sizeof(*tx_info_priv), GFP_ATOMIC); - ASSERT(tx_info->driver_data[0] != NULL); - tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0]; + /* allocate driver private area of tx_info, XXX: UGLY HACK! */ + tx_info->control.vif = kzalloc(sizeof(*tx_info_priv), GFP_ATOMIC); + tx_info_priv = (struct ath_tx_info_priv *)tx_info->control.vif; + ASSERT(tx_info_priv != NULL); lowest_idx = rate_lowest_index(sband, sta); tx_info_priv->min_rate = (sband->bitrates[lowest_idx].bitrate * 2) / 10; /* lowest rate for management and multicast/broadcast frames */ if (!ieee80211_is_data(fc) || is_multicast_ether_addr(hdr->addr1) || !sta) { - sel->rate_idx = lowest_idx; + tx_info->control.rates[0].idx = lowest_idx; return; } @@ -1966,8 +1964,10 @@ static void ath_get_rate(void *priv, struct ieee80211_supported_band *sband, tx_info_priv->rcs, &is_probe, false); +#if 0 if (is_probe) sel->probe_idx = ath_rc_priv->tx_ratectrl.probe_rate; +#endif /* Ratecontrol sometimes returns invalid rate index */ if (tx_info_priv->rcs[0].rix != 0xff) @@ -1975,7 +1975,7 @@ static void ath_get_rate(void *priv, struct ieee80211_supported_band *sband, else tx_info_priv->rcs[0].rix = ath_rc_priv->prev_data_rix; - sel->rate_idx = tx_info_priv->rcs[0].rix; + tx_info->control.rates[0].idx = tx_info_priv->rcs[0].rix; /* Check if aggregation has to be enabled for this tid */ diff --git a/drivers/net/wireless/ath9k/xmit.c b/drivers/net/wireless/ath9k/xmit.c index ba818cc2fb5c..9fa395418a6c 100644 --- a/drivers/net/wireless/ath9k/xmit.c +++ b/drivers/net/wireless/ath9k/xmit.c @@ -168,7 +168,9 @@ static void fill_min_rates(struct sk_buff *skb, struct ath_tx_control *txctl) hdr = (struct ieee80211_hdr *)skb->data; fc = hdr->frame_control; - tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0]; + + /* XXX: HACK! */ + tx_info_priv = (struct ath_tx_info_priv *)tx_info->control.vif; if (ieee80211_is_mgmt(fc) || ieee80211_is_ctl(fc)) { txctl->use_minrate = 1; @@ -288,13 +290,16 @@ static int ath_tx_prepare(struct ath_softc *sc, if (tx_info->flags & IEEE80211_TX_CTL_NO_ACK) txctl->flags |= ATH9K_TXDESC_NOACK; - if (tx_info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) + + if (tx_info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) txctl->flags |= ATH9K_TXDESC_RTSENA; /* * Setup for rate calculations. */ - tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0]; + + /* XXX: HACK! */ + tx_info_priv = (struct ath_tx_info_priv *)tx_info->control.vif; rcs = tx_info_priv->rcs; if (ieee80211_is_data(fc) && !txctl->use_minrate) { @@ -855,7 +860,9 @@ static int ath_tx_send_normal(struct ath_softc *sc, skb = (struct sk_buff *)bf->bf_mpdu; tx_info = IEEE80211_SKB_CB(skb); - tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0]; + + /* XXX: HACK! */ + tx_info_priv = (struct ath_tx_info_priv *)tx_info->control.vif; memcpy(bf->bf_rcs, tx_info_priv->rcs, 4 * sizeof(tx_info_priv->rcs[0])); /* update starting sequence number for subsequent ADDBA request */ @@ -1249,8 +1256,9 @@ static int ath_tx_processq(struct ath_softc *sc, struct ath_txq *txq) } skb = bf->bf_mpdu; tx_info = IEEE80211_SKB_CB(skb); - tx_info_priv = (struct ath_tx_info_priv *) - tx_info->driver_data[0]; + + /* XXX: HACK! */ + tx_info_priv = (struct ath_tx_info_priv *) tx_info->control.vif; if (ds->ds_txstat.ts_status & ATH9K_TXERR_FILT) tx_info->flags |= IEEE80211_TX_STAT_TX_FILTERED; if ((ds->ds_txstat.ts_status & ATH9K_TXERR_FILT) == 0 && @@ -1431,7 +1439,8 @@ static int ath_tx_send_ampdu(struct ath_softc *sc, skb = (struct sk_buff *)bf->bf_mpdu; tx_info = IEEE80211_SKB_CB(skb); - tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0]; + /* XXX: HACK! */ + tx_info_priv = (struct ath_tx_info_priv *)tx_info->control.vif; memcpy(bf->bf_rcs, tx_info_priv->rcs, 4 * sizeof(tx_info_priv->rcs[0])); /* Add sub-frame to BAW */ @@ -1466,7 +1475,7 @@ static u32 ath_lookup_rate(struct ath_softc *sc, skb = (struct sk_buff *)bf->bf_mpdu; tx_info = IEEE80211_SKB_CB(skb); tx_info_priv = (struct ath_tx_info_priv *) - tx_info->driver_data[0]; + tx_info->control.vif; /* XXX: HACK! */ memcpy(bf->bf_rcs, tx_info_priv->rcs, 4 * sizeof(tx_info_priv->rcs[0])); @@ -1927,7 +1936,8 @@ static int ath_tx_start_dma(struct ath_softc *sc, bf->bf_flags = txctl->flags; bf->bf_keytype = txctl->keytype; - tx_info_priv = (struct ath_tx_info_priv *)tx_info->driver_data[0]; + /* XXX: HACK! */ + tx_info_priv = (struct ath_tx_info_priv *)tx_info->control.vif; rcs = tx_info_priv->rcs; bf->bf_rcs[0] = rcs[0]; bf->bf_rcs[1] = rcs[1]; diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c index 098f886976f6..6d65a02b7052 100644 --- a/drivers/net/wireless/b43/dma.c +++ b/drivers/net/wireless/b43/dma.c @@ -1387,13 +1387,11 @@ void b43_dma_handle_txstatus(struct b43_wldev *dev, info = IEEE80211_SKB_CB(meta->skb); - memset(&info->status, 0, sizeof(info->status)); - /* * Call back to inform the ieee80211 subsystem about * the status of the transmission. */ - frame_succeed = b43_fill_txstatus_report(info, status); + frame_succeed = b43_fill_txstatus_report(dev, info, status); #ifdef CONFIG_B43_DEBUG if (frame_succeed) ring->nr_succeed_tx_packets++; diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 9aeeb6553a91..2a599fb772d9 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -4555,7 +4555,7 @@ static int b43_wireless_init(struct ssb_device *dev) BIT(NL80211_IFTYPE_ADHOC); hw->queues = b43_modparam_qos ? 4 : 1; - hw->max_altrates = 1; + hw->max_rates = 2; SET_IEEE80211_DEV(hw, dev->dev); if (is_valid_ether_addr(sprom->et1mac)) SET_IEEE80211_PERM_ADDR(hw, sprom->et1mac); diff --git a/drivers/net/wireless/b43/pio.c b/drivers/net/wireless/b43/pio.c index 401591267592..1036bef8c4cc 100644 --- a/drivers/net/wireless/b43/pio.c +++ b/drivers/net/wireless/b43/pio.c @@ -587,9 +587,8 @@ void b43_pio_handle_txstatus(struct b43_wldev *dev, spin_lock(&q->lock); /* IRQs are already disabled. */ info = IEEE80211_SKB_CB(pack->skb); - memset(&info->status, 0, sizeof(info->status)); - b43_fill_txstatus_report(info, status); + b43_fill_txstatus_report(dev, info, status); total_len = pack->skb->len + b43_txhdr_size(dev); total_len = roundup(total_len, 4); diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c index 2fabcf8f0474..adba89b816d4 100644 --- a/drivers/net/wireless/b43/xmit.c +++ b/drivers/net/wireless/b43/xmit.c @@ -185,7 +185,7 @@ int b43_generate_txhdr(struct b43_wldev *dev, u8 *_txhdr, const unsigned char *fragment_data, unsigned int fragment_len, - const struct ieee80211_tx_info *info, + struct ieee80211_tx_info *info, u16 cookie) { struct b43_txhdr *txhdr = (struct b43_txhdr *)_txhdr; @@ -202,6 +202,7 @@ int b43_generate_txhdr(struct b43_wldev *dev, u16 phy_ctl = 0; u8 extra_ft = 0; struct ieee80211_rate *txrate; + struct ieee80211_tx_rate *rates; memset(txhdr, 0, sizeof(*txhdr)); @@ -291,7 +292,7 @@ int b43_generate_txhdr(struct b43_wldev *dev, phy_ctl |= B43_TXH_PHY_ENC_OFDM; else phy_ctl |= B43_TXH_PHY_ENC_CCK; - if (info->flags & IEEE80211_TX_CTL_SHORT_PREAMBLE) + if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE) phy_ctl |= B43_TXH_PHY_SHORTPRMBL; switch (b43_ieee80211_antenna_sanitize(dev, info->antenna_sel_tx)) { @@ -314,6 +315,7 @@ int b43_generate_txhdr(struct b43_wldev *dev, B43_WARN_ON(1); } + rates = info->control.rates; /* MAC control */ if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) mac_ctl |= B43_TXH_MAC_ACK; @@ -324,12 +326,22 @@ int b43_generate_txhdr(struct b43_wldev *dev, mac_ctl |= B43_TXH_MAC_STMSDU; if (phy->type == B43_PHYTYPE_A) mac_ctl |= B43_TXH_MAC_5GHZ; - if (info->flags & IEEE80211_TX_CTL_LONG_RETRY_LIMIT) + + /* Overwrite rates[0].count to make the retry calculation + * in the tx status easier. need the actual retry limit to + * detect whether the fallback rate was used. + */ + if ((rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || + (rates[0].count <= dev->wl->hw->conf.long_frame_max_tx_count)) { + rates[0].count = dev->wl->hw->conf.long_frame_max_tx_count; mac_ctl |= B43_TXH_MAC_LONGFRAME; + } else { + rates[0].count = dev->wl->hw->conf.short_frame_max_tx_count; + } /* Generate the RTS or CTS-to-self frame */ - if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) || - (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) { + if ((rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || + (rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) { unsigned int len; struct ieee80211_hdr *hdr; int rts_rate, rts_rate_fb; @@ -344,7 +356,7 @@ int b43_generate_txhdr(struct b43_wldev *dev, rts_rate_fb = b43_calc_fallback_rate(rts_rate); rts_rate_fb_ofdm = b43_is_ofdm_rate(rts_rate_fb); - if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) { + if (rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) { struct ieee80211_cts *cts; if (b43_is_old_txhdr_format(dev)) { @@ -687,10 +699,18 @@ void b43_handle_txstatus(struct b43_wldev *dev, /* Fill out the mac80211 TXstatus report based on the b43-specific * txstatus report data. This returns a boolean whether the frame was * successfully transmitted. */ -bool b43_fill_txstatus_report(struct ieee80211_tx_info *report, +bool b43_fill_txstatus_report(struct b43_wldev *dev, + struct ieee80211_tx_info *report, const struct b43_txstatus *status) { bool frame_success = 1; + int retry_limit; + + /* preserve the confiured retry limit before clearing the status + * The xmit function has overwritten the rc's value with the actual + * retry limit done by the hardware */ + retry_limit = report->status.rates[0].count; + ieee80211_tx_info_clear_status(report); if (status->acked) { /* The frame was ACKed. */ @@ -700,14 +720,32 @@ bool b43_fill_txstatus_report(struct ieee80211_tx_info *report, if (!(report->flags & IEEE80211_TX_CTL_NO_ACK)) { /* ...but we expected an ACK. */ frame_success = 0; - report->status.excessive_retries = 1; } } if (status->frame_count == 0) { /* The frame was not transmitted at all. */ - report->status.retry_count = 0; - } else - report->status.retry_count = status->frame_count - 1; + report->status.rates[0].count = 0; + } else if (status->rts_count > dev->wl->hw->conf.short_frame_max_tx_count) { + /* + * If the short retries (RTS, not data frame) have exceeded + * the limit, the hw will not have tried the selected rate, + * but will have used the fallback rate instead. + * Don't let the rate control count attempts for the selected + * rate in this case, otherwise the statistics will be off. + */ + report->status.rates[0].count = 0; + report->status.rates[1].count = status->frame_count; + } else { + if (status->frame_count > retry_limit) { + report->status.rates[0].count = retry_limit; + report->status.rates[1].count = status->frame_count - + retry_limit; + + } else { + report->status.rates[0].count = status->frame_count; + report->status.rates[1].idx = -1; + } + } return frame_success; } diff --git a/drivers/net/wireless/b43/xmit.h b/drivers/net/wireless/b43/xmit.h index 0215faf47541..4fb2a190f7a7 100644 --- a/drivers/net/wireless/b43/xmit.h +++ b/drivers/net/wireless/b43/xmit.h @@ -178,7 +178,7 @@ int b43_generate_txhdr(struct b43_wldev *dev, u8 * txhdr, const unsigned char *fragment_data, unsigned int fragment_len, - const struct ieee80211_tx_info *txctl, u16 cookie); + struct ieee80211_tx_info *txctl, u16 cookie); /* Transmit Status */ struct b43_txstatus { @@ -294,7 +294,8 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr); void b43_handle_txstatus(struct b43_wldev *dev, const struct b43_txstatus *status); -bool b43_fill_txstatus_report(struct ieee80211_tx_info *report, +bool b43_fill_txstatus_report(struct b43_wldev *dev, + struct ieee80211_tx_info *report, const struct b43_txstatus *status); void b43_tx_suspend(struct b43_wldev *dev); diff --git a/drivers/net/wireless/b43legacy/dma.c b/drivers/net/wireless/b43legacy/dma.c index fb6819e40f38..308c2647f002 100644 --- a/drivers/net/wireless/b43legacy/dma.c +++ b/drivers/net/wireless/b43legacy/dma.c @@ -1411,6 +1411,7 @@ void b43legacy_dma_handle_txstatus(struct b43legacy_wldev *dev, struct b43legacy_dmaring *ring; struct b43legacy_dmadesc_generic *desc; struct b43legacy_dmadesc_meta *meta; + int retry_limit; int slot; ring = parse_cookie(dev, status->cookie, &slot); @@ -1437,25 +1438,42 @@ void b43legacy_dma_handle_txstatus(struct b43legacy_wldev *dev, struct ieee80211_tx_info *info; BUG_ON(!meta->skb); info = IEEE80211_SKB_CB(meta->skb); - /* Call back to inform the ieee80211 subsystem about the - * status of the transmission. - * Some fields of txstat are already filled in dma_tx(). - */ - memset(&info->status, 0, sizeof(info->status)); + /* preserve the confiured retry limit before clearing the status + * The xmit function has overwritten the rc's value with the actual + * retry limit done by the hardware */ + retry_limit = info->status.rates[0].count; + ieee80211_tx_info_clear_status(info); - if (status->acked) { + if (status->acked) info->flags |= IEEE80211_TX_STAT_ACK; + + if (status->rts_count > dev->wl->hw->conf.short_frame_max_tx_count) { + /* + * If the short retries (RTS, not data frame) have exceeded + * the limit, the hw will not have tried the selected rate, + * but will have used the fallback rate instead. + * Don't let the rate control count attempts for the selected + * rate in this case, otherwise the statistics will be off. + */ + info->status.rates[0].count = 0; + info->status.rates[1].count = status->frame_count; } else { - if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) - info->status.excessive_retries = 1; + if (status->frame_count > retry_limit) { + info->status.rates[0].count = retry_limit; + info->status.rates[1].count = status->frame_count - + retry_limit; + + } else { + info->status.rates[0].count = status->frame_count; + info->status.rates[1].idx = -1; + } } - if (status->frame_count == 0) { - /* The frame was not transmitted at all. */ - info->status.retry_count = 0; - } else - info->status.retry_count = status->frame_count - - 1; + + /* Call back to inform the ieee80211 subsystem about the + * status of the transmission. + * Some fields of txstat are already filled in dma_tx(). + */ ieee80211_tx_status_irqsafe(dev->wl->hw, meta->skb); /* skb is freed by ieee80211_tx_status_irqsafe() */ meta->skb = NULL; diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 78e46365f69e..9edbdf9cb50f 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -3682,7 +3682,7 @@ static int b43legacy_wireless_init(struct ssb_device *dev) BIT(NL80211_IFTYPE_WDS) | BIT(NL80211_IFTYPE_ADHOC); hw->queues = 1; /* FIXME: hardware has more queues */ - hw->max_altrates = 1; + hw->max_rates = 2; SET_IEEE80211_DEV(hw, dev->dev); if (is_valid_ether_addr(sprom->et1mac)) SET_IEEE80211_PERM_ADDR(hw, sprom->et1mac); diff --git a/drivers/net/wireless/b43legacy/pio.c b/drivers/net/wireless/b43legacy/pio.c index a86c7647fa2d..746d5361bba0 100644 --- a/drivers/net/wireless/b43legacy/pio.c +++ b/drivers/net/wireless/b43legacy/pio.c @@ -491,6 +491,7 @@ void b43legacy_pio_handle_txstatus(struct b43legacy_wldev *dev, struct b43legacy_pioqueue *queue; struct b43legacy_pio_txpacket *packet; struct ieee80211_tx_info *info; + int retry_limit; queue = parse_cookie(dev, status->cookie, &packet); B43legacy_WARN_ON(!queue); @@ -503,11 +504,37 @@ void b43legacy_pio_handle_txstatus(struct b43legacy_wldev *dev, sizeof(struct b43legacy_txhdr_fw3)); info = IEEE80211_SKB_CB(packet->skb); - memset(&info->status, 0, sizeof(info->status)); + + /* preserve the confiured retry limit before clearing the status + * The xmit function has overwritten the rc's value with the actual + * retry limit done by the hardware */ + retry_limit = info->status.rates[0].count; + ieee80211_tx_info_clear_status(info); if (status->acked) info->flags |= IEEE80211_TX_STAT_ACK; - info->status.retry_count = status->frame_count - 1; + + if (status->rts_count > dev->wl->hw->conf.short_frame_max_tx_count) { + /* + * If the short retries (RTS, not data frame) have exceeded + * the limit, the hw will not have tried the selected rate, + * but will have used the fallback rate instead. + * Don't let the rate control count attempts for the selected + * rate in this case, otherwise the statistics will be off. + */ + info->status.rates[0].count = 0; + info->status.rates[1].count = status->frame_count; + } else { + if (status->frame_count > retry_limit) { + info->status.rates[0].count = retry_limit; + info->status.rates[1].count = status->frame_count - + retry_limit; + + } else { + info->status.rates[0].count = status->frame_count; + info->status.rates[1].idx = -1; + } + } ieee80211_tx_status_irqsafe(dev->wl->hw, packet->skb); packet->skb = NULL; diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c index 65e833781608..12fca99f7578 100644 --- a/drivers/net/wireless/b43legacy/xmit.c +++ b/drivers/net/wireless/b43legacy/xmit.c @@ -188,7 +188,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev, struct b43legacy_txhdr_fw3 *txhdr, const unsigned char *fragment_data, unsigned int fragment_len, - const struct ieee80211_tx_info *info, + struct ieee80211_tx_info *info, u16 cookie) { const struct ieee80211_hdr *wlhdr; @@ -201,6 +201,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev, u32 mac_ctl = 0; u16 phy_ctl = 0; struct ieee80211_rate *tx_rate; + struct ieee80211_tx_rate *rates; wlhdr = (const struct ieee80211_hdr *)fragment_data; @@ -274,7 +275,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev, /* PHY TX Control word */ if (rate_ofdm) phy_ctl |= B43legacy_TX4_PHY_OFDM; - if (dev->short_preamble) + if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE) phy_ctl |= B43legacy_TX4_PHY_SHORTPRMBL; switch (info->antenna_sel_tx) { case 0: @@ -291,6 +292,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev, } /* MAC control */ + rates = info->control.rates; if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) mac_ctl |= B43legacy_TX4_MAC_ACK; if (info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ) @@ -299,12 +301,22 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev, mac_ctl |= B43legacy_TX4_MAC_STMSDU; if (rate_fb_ofdm) mac_ctl |= B43legacy_TX4_MAC_FALLBACKOFDM; - if (info->flags & IEEE80211_TX_CTL_LONG_RETRY_LIMIT) + + /* Overwrite rates[0].count to make the retry calculation + * in the tx status easier. need the actual retry limit to + * detect whether the fallback rate was used. + */ + if ((rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || + (rates[0].count <= dev->wl->hw->conf.long_frame_max_tx_count)) { + rates[0].count = dev->wl->hw->conf.long_frame_max_tx_count; mac_ctl |= B43legacy_TX4_MAC_LONGFRAME; + } else { + rates[0].count = dev->wl->hw->conf.short_frame_max_tx_count; + } /* Generate the RTS or CTS-to-self frame */ - if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) || - (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) { + if ((rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || + (rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) { unsigned int len; struct ieee80211_hdr *hdr; int rts_rate; @@ -319,7 +331,7 @@ static int generate_txhdr_fw3(struct b43legacy_wldev *dev, if (rts_rate_fb_ofdm) mac_ctl |= B43legacy_TX4_MAC_CTSFALLBACKOFDM; - if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) { + if (rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) { ieee80211_ctstoself_get(dev->wl->hw, info->control.vif, fragment_data, @@ -362,7 +374,7 @@ int b43legacy_generate_txhdr(struct b43legacy_wldev *dev, u8 *txhdr, const unsigned char *fragment_data, unsigned int fragment_len, - const struct ieee80211_tx_info *info, + struct ieee80211_tx_info *info, u16 cookie) { return generate_txhdr_fw3(dev, (struct b43legacy_txhdr_fw3 *)txhdr, diff --git a/drivers/net/wireless/b43legacy/xmit.h b/drivers/net/wireless/b43legacy/xmit.h index e56777e0feab..62e09d02788f 100644 --- a/drivers/net/wireless/b43legacy/xmit.h +++ b/drivers/net/wireless/b43legacy/xmit.h @@ -80,7 +80,7 @@ int b43legacy_generate_txhdr(struct b43legacy_wldev *dev, u8 *txhdr, const unsigned char *fragment_data, unsigned int fragment_len, - const struct ieee80211_tx_info *info, + struct ieee80211_tx_info *info, u16 cookie); diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c index c25daec4f93d..f440ed0fe543 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c @@ -422,34 +422,6 @@ static void rs_free_sta(void *priv, struct ieee80211_sta *sta, } -/* - * get ieee prev rate from rate scale table. - * for A and B mode we need to overright prev - * value - */ -static int rs_adjust_next_rate(struct iwl3945_priv *priv, int rate) -{ - int next_rate = iwl3945_get_prev_ieee_rate(rate); - - switch (priv->band) { - case IEEE80211_BAND_5GHZ: - if (rate == IWL_RATE_12M_INDEX) - next_rate = IWL_RATE_9M_INDEX; - else if (rate == IWL_RATE_6M_INDEX) - next_rate = IWL_RATE_6M_INDEX; - break; -/* XXX cannot be invoked in current mac80211 so not a regression - case MODE_IEEE80211B: - if (rate == IWL_RATE_11M_INDEX_TABLE) - next_rate = IWL_RATE_5M_INDEX_TABLE; - break; - */ - default: - break; - } - - return next_rate; -} /** * rs_tx_status - Update rate control values based on Tx results * @@ -460,17 +432,21 @@ static void rs_tx_status(void *priv_rate, struct ieee80211_supported_band *sband struct ieee80211_sta *sta, void *priv_sta, struct sk_buff *skb) { - u8 retries, current_count; + u8 retries = 0, current_count; int scale_rate_index, first_index, last_index; unsigned long flags; struct iwl3945_priv *priv = (struct iwl3945_priv *)priv_rate; struct iwl3945_rs_sta *rs_sta = priv_sta; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + int i; IWL_DEBUG_RATE("enter\n"); - retries = info->status.retry_count; - first_index = sband->bitrates[info->tx_rate_idx].hw_value; + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) + retries += info->status.rates[i].count; + retries--; + + first_index = sband->bitrates[info->status.rates[0].idx].hw_value; if ((first_index < 0) || (first_index >= IWL_RATE_COUNT)) { IWL_DEBUG_RATE("leave: Rate out of bounds: %d\n", first_index); return; @@ -502,7 +478,7 @@ static void rs_tx_status(void *priv_rate, struct ieee80211_supported_band *sband last_index = scale_rate_index; } else { current_count = priv->retry_rate; - last_index = rs_adjust_next_rate(priv, + last_index = iwl3945_rs_next_rate(priv, scale_rate_index); } @@ -518,7 +494,7 @@ static void rs_tx_status(void *priv_rate, struct ieee80211_supported_band *sband if (retries) scale_rate_index = - rs_adjust_next_rate(priv, scale_rate_index); + iwl3945_rs_next_rate(priv, scale_rate_index); } @@ -630,10 +606,11 @@ static u16 iwl3945_get_adjacent_rate(struct iwl3945_rs_sta *rs_sta, * rate table and must reference the driver allocated rate table * */ -static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, - struct sk_buff *skb, struct rate_selection *sel) +static void rs_get_rate(void *priv_r, struct ieee80211_sta *sta, + void *priv_sta, struct ieee80211_tx_rate_control *txrc) { + struct ieee80211_supported_band *sband = txrc->sband; + struct sk_buff *skb = txrc->skb; u8 low = IWL_RATE_INVALID; u8 high = IWL_RATE_INVALID; u16 high_low; @@ -649,6 +626,7 @@ static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband, struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; u16 fc, rate_mask; struct iwl3945_priv *priv = (struct iwl3945_priv *)priv_r; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); IWL_DEBUG_RATE("enter\n"); @@ -659,7 +637,7 @@ static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband, is_multicast_ether_addr(hdr->addr1) || !sta || !priv_sta) { IWL_DEBUG_RATE("leave: No STA priv data to update!\n"); - sel->rate_idx = rate_lowest_index(sband, sta); + info->control.rates[0].idx = rate_lowest_index(sband, sta); return; } @@ -792,9 +770,10 @@ static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband, rs_sta->last_txrate_idx = index; if (sband->band == IEEE80211_BAND_5GHZ) - sel->rate_idx = rs_sta->last_txrate_idx - IWL_FIRST_OFDM_RATE; + info->control.rates[0].idx = rs_sta->last_txrate_idx - + IWL_FIRST_OFDM_RATE; else - sel->rate_idx = rs_sta->last_txrate_idx; + info->control.rates[0].idx = rs_sta->last_txrate_idx; IWL_DEBUG_RATE("leave: %d\n", index); } diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c index 8a00245be51e..7afafb629706 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945.c @@ -261,6 +261,35 @@ static inline const char *iwl3945_get_tx_fail_reason(u32 status) } #endif +/* + * get ieee prev rate from rate scale table. + * for A and B mode we need to overright prev + * value + */ +int iwl3945_rs_next_rate(struct iwl3945_priv *priv, int rate) +{ + int next_rate = iwl3945_get_prev_ieee_rate(rate); + + switch (priv->band) { + case IEEE80211_BAND_5GHZ: + if (rate == IWL_RATE_12M_INDEX) + next_rate = IWL_RATE_9M_INDEX; + else if (rate == IWL_RATE_6M_INDEX) + next_rate = IWL_RATE_6M_INDEX; + break; +/* XXX cannot be invoked in current mac80211 so not a regression + case MODE_IEEE80211B: + if (rate == IWL_RATE_11M_INDEX_TABLE) + next_rate = IWL_RATE_5M_INDEX_TABLE; + break; + */ + default: + break; + } + + return next_rate; +} + /** * iwl3945_tx_queue_reclaim - Reclaim Tx queue entries already Tx'd @@ -308,6 +337,7 @@ static void iwl3945_rx_reply_tx(struct iwl3945_priv *priv, struct iwl3945_tx_resp *tx_resp = (void *)&pkt->u.raw[0]; u32 status = le32_to_cpu(tx_resp->status); int rate_idx; + int fail, i; if ((index >= txq->q.n_bd) || (iwl3945_x2_queue_used(&txq->q, index) == 0)) { IWL_ERROR("Read index for DMA queue txq_id (%d) index %d " @@ -318,9 +348,36 @@ static void iwl3945_rx_reply_tx(struct iwl3945_priv *priv, } info = IEEE80211_SKB_CB(txq->txb[txq->q.read_ptr].skb[0]); - memset(&info->status, 0, sizeof(info->status)); + ieee80211_tx_info_clear_status(info); + + /* Fill the MRR chain with some info about on-chip retransmissions */ + rate_idx = iwl3945_hwrate_to_plcp_idx(tx_resp->rate); + if (info->band == IEEE80211_BAND_5GHZ) + rate_idx -= IWL_FIRST_OFDM_RATE; + + fail = tx_resp->failure_frame; + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { + int next = iwl3945_rs_next_rate(priv, rate_idx); + + info->status.rates[i].idx = rate_idx; + + /* + * Put remaining into the last count as best approximation + * of saying exactly what the hardware would have done... + */ + if ((rate_idx == next) || (i == IEEE80211_TX_MAX_RATES - 1)) { + info->status.rates[i].count = fail; + break; + } + + info->status.rates[i].count = priv->retry_rate; + fail -= priv->retry_rate; + rate_idx = next; + if (fail <= 0) + break; + } + info->status.rates[i].count++; /* add final attempt */ - info->status.retry_count = tx_resp->failure_frame; /* tx_status->rts_retry_count = tx_resp->failure_rts; */ info->flags |= ((status & TX_STATUS_MSK) == TX_STATUS_SUCCESS) ? IEEE80211_TX_STAT_ACK : 0; @@ -329,10 +386,6 @@ static void iwl3945_rx_reply_tx(struct iwl3945_priv *priv, txq_id, iwl3945_get_tx_fail_reason(status), status, tx_resp->rate, tx_resp->failure_frame); - rate_idx = iwl3945_hwrate_to_plcp_idx(tx_resp->rate); - if (info->band == IEEE80211_BAND_5GHZ) - rate_idx -= IWL_FIRST_OFDM_RATE; - info->tx_rate_idx = rate_idx; IWL_DEBUG_TX_REPLY("Tx queue reclaim %d\n", index); iwl3945_tx_queue_reclaim(priv, txq_id, index); diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.h b/drivers/net/wireless/iwlwifi/iwl-3945.h index bdd32475b99c..592c5958723b 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.h +++ b/drivers/net/wireless/iwlwifi/iwl-3945.h @@ -954,6 +954,8 @@ static inline int is_channel_ibss(const struct iwl3945_channel_info *ch) extern const struct iwl3945_channel_info *iwl3945_get_channel_info( const struct iwl3945_priv *priv, enum ieee80211_band band, u16 channel); +extern int iwl3945_rs_next_rate(struct iwl3945_priv *priv, int rate); + /* Requires full declaration of iwl3945_priv before including */ #include "iwl-3945-io.h" diff --git a/drivers/net/wireless/iwlwifi/iwl-4965.c b/drivers/net/wireless/iwlwifi/iwl-4965.c index 9838de5f4369..222c2baa95ca 100644 --- a/drivers/net/wireless/iwlwifi/iwl-4965.c +++ b/drivers/net/wireless/iwlwifi/iwl-4965.c @@ -619,10 +619,10 @@ static void iwl4965_gain_computation(struct iwl_priv *priv, static void iwl4965_rts_tx_cmd_flag(struct ieee80211_tx_info *info, __le32 *tx_flags) { - if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) { + if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) { *tx_flags |= TX_CMD_FLG_RTS_MSK; *tx_flags &= ~TX_CMD_FLG_CTS_MSK; - } else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) { + } else if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) { *tx_flags &= ~TX_CMD_FLG_RTS_MSK; *tx_flags |= TX_CMD_FLG_CTS_MSK; } @@ -2070,7 +2070,7 @@ static int iwl4965_tx_status_reply_tx(struct iwl_priv *priv, agg->frame_count, agg->start_idx, idx); info = IEEE80211_SKB_CB(priv->txq[txq_id].txb[idx].skb[0]); - info->status.retry_count = tx_resp->failure_frame; + info->status.rates[0].count = tx_resp->failure_frame + 1; info->flags &= ~IEEE80211_TX_CTL_AMPDU; info->flags |= iwl_is_tx_success(status)? IEEE80211_TX_STAT_ACK : 0; @@ -2227,7 +2227,7 @@ static void iwl4965_rx_reply_tx(struct iwl_priv *priv, iwl_txq_check_empty(priv, sta_id, tid, txq_id); } } else { - info->status.retry_count = tx_resp->failure_frame; + info->status.rates[0].count = tx_resp->failure_frame + 1; info->flags |= iwl_is_tx_success(status) ? IEEE80211_TX_STAT_ACK : 0; iwl_hwrate_to_tx_control(priv, diff --git a/drivers/net/wireless/iwlwifi/iwl-5000.c b/drivers/net/wireless/iwlwifi/iwl-5000.c index 56a3f0c84a1e..d5282fa65084 100644 --- a/drivers/net/wireless/iwlwifi/iwl-5000.c +++ b/drivers/net/wireless/iwlwifi/iwl-5000.c @@ -390,8 +390,8 @@ static void iwl5000_chain_noise_reset(struct iwl_priv *priv) static void iwl5000_rts_tx_cmd_flag(struct ieee80211_tx_info *info, __le32 *tx_flags) { - if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) || - (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) + if ((info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || + (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) *tx_flags |= TX_CMD_FLG_RTS_CTS_MSK; else *tx_flags &= ~TX_CMD_FLG_RTS_CTS_MSK; @@ -1154,7 +1154,7 @@ static int iwl5000_tx_status_reply_tx(struct iwl_priv *priv, agg->frame_count, agg->start_idx, idx); info = IEEE80211_SKB_CB(priv->txq[txq_id].txb[idx].skb[0]); - info->status.retry_count = tx_resp->failure_frame; + info->status.rates[0].count = tx_resp->failure_frame + 1; info->flags &= ~IEEE80211_TX_CTL_AMPDU; info->flags |= iwl_is_tx_success(status)? IEEE80211_TX_STAT_ACK : 0; @@ -1307,7 +1307,7 @@ static void iwl5000_rx_reply_tx(struct iwl_priv *priv, iwl_txq_check_empty(priv, sta_id, tid, txq_id); } } else { - info->status.retry_count = tx_resp->failure_frame; + info->status.rates[0].count = tx_resp->failure_frame + 1; info->flags = iwl_is_tx_success(status) ? IEEE80211_TX_STAT_ACK : 0; iwl_hwrate_to_tx_control(priv, diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c index e10e0ca09ce9..f685e5d6c281 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c @@ -798,7 +798,7 @@ static void rs_tx_status(void *priv_r, struct ieee80211_supported_band *sband, !(info->flags & IEEE80211_TX_STAT_AMPDU)) return; - retries = info->status.retry_count; + retries = info->status.rates[0].count - 1; if (retries > 15) retries = 15; @@ -830,20 +830,15 @@ static void rs_tx_status(void *priv_r, struct ieee80211_supported_band *sband, if (priv->band == IEEE80211_BAND_5GHZ) rs_index -= IWL_FIRST_OFDM_RATE; - if ((info->tx_rate_idx < 0) || - (tbl_type.is_SGI ^ - !!(info->flags & IEEE80211_TX_CTL_SHORT_GI)) || - (tbl_type.is_fat ^ - !!(info->flags & IEEE80211_TX_CTL_40_MHZ_WIDTH)) || - (tbl_type.is_dup ^ - !!(info->flags & IEEE80211_TX_CTL_DUP_DATA)) || - (tbl_type.ant_type ^ info->antenna_sel_tx) || - (!!(tx_rate & RATE_MCS_HT_MSK) ^ - !!(info->flags & IEEE80211_TX_CTL_OFDM_HT)) || - (!!(tx_rate & RATE_MCS_GF_MSK) ^ - !!(info->flags & IEEE80211_TX_CTL_GREEN_FIELD)) || + if ((info->status.rates[0].idx < 0) || + (tbl_type.is_SGI != !!(info->status.rates[0].flags & IEEE80211_TX_RC_SHORT_GI)) || + (tbl_type.is_fat != !!(info->status.rates[0].flags & IEEE80211_TX_RC_40_MHZ_WIDTH)) || + (tbl_type.is_dup != !!(info->status.rates[0].flags & IEEE80211_TX_RC_DUP_DATA)) || + (tbl_type.ant_type != info->antenna_sel_tx) || + (!!(tx_rate & RATE_MCS_HT_MSK) != !!(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) || + (!!(tx_rate & RATE_MCS_GF_MSK) != !!(info->status.rates[0].flags & IEEE80211_TX_RC_GREEN_FIELD)) || (hw->wiphy->bands[priv->band]->bitrates[rs_index].bitrate != - hw->wiphy->bands[info->band]->bitrates[info->tx_rate_idx].bitrate)) { + hw->wiphy->bands[info->band]->bitrates[info->status.rates[0].idx].bitrate)) { IWL_DEBUG_RATE("initial rate does not match 0x%x\n", tx_rate); goto out; } @@ -2098,15 +2093,17 @@ static void rs_initialize_lq(struct iwl_priv *priv, return; } -static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, - struct sk_buff *skb, struct rate_selection *sel) +static void rs_get_rate(void *priv_r, struct ieee80211_sta *sta, void *priv_sta, + struct ieee80211_tx_rate_control *txrc) { int i; + struct sk_buff *skb = txrc->skb; + struct ieee80211_supported_band *sband = txrc->sband; struct iwl_priv *priv = (struct iwl_priv *)priv_r; struct ieee80211_conf *conf = &priv->hw->conf; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); __le16 fc; struct iwl_lq_sta *lq_sta; @@ -2117,7 +2114,7 @@ static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband, fc = hdr->frame_control; if (!ieee80211_is_data(fc) || is_multicast_ether_addr(hdr->addr1) || !sta || !priv_sta) { - sel->rate_idx = rate_lowest_index(sband, sta); + info->control.rates[0].idx = rate_lowest_index(sband, sta); return; } @@ -2143,13 +2140,13 @@ static void rs_get_rate(void *priv_r, struct ieee80211_supported_band *sband, } if ((i < 0) || (i > IWL_RATE_COUNT)) { - sel->rate_idx = rate_lowest_index(sband, sta); + info->control.rates[0].idx = rate_lowest_index(sband, sta); return; } if (sband->band == IEEE80211_BAND_5GHZ) i -= IWL_FIRST_OFDM_RATE; - sel->rate_idx = i; + info->control.rates[0].idx = i; } static void *rs_alloc_sta(void *priv_rate, struct ieee80211_sta *sta, diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 10f5a0a233fe..2c4162e48140 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -88,26 +88,27 @@ EXPORT_SYMBOL(iwl_rates); * translate ucode response to mac80211 tx status control values */ void iwl_hwrate_to_tx_control(struct iwl_priv *priv, u32 rate_n_flags, - struct ieee80211_tx_info *control) + struct ieee80211_tx_info *info) { int rate_index; + struct ieee80211_tx_rate *r = &info->control.rates[0]; - control->antenna_sel_tx = + info->antenna_sel_tx = ((rate_n_flags & RATE_MCS_ANT_ABC_MSK) >> RATE_MCS_ANT_POS); if (rate_n_flags & RATE_MCS_HT_MSK) - control->flags |= IEEE80211_TX_CTL_OFDM_HT; + r->flags |= IEEE80211_TX_RC_MCS; if (rate_n_flags & RATE_MCS_GF_MSK) - control->flags |= IEEE80211_TX_CTL_GREEN_FIELD; + r->flags |= IEEE80211_TX_RC_GREEN_FIELD; if (rate_n_flags & RATE_MCS_FAT_MSK) - control->flags |= IEEE80211_TX_CTL_40_MHZ_WIDTH; + r->flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; if (rate_n_flags & RATE_MCS_DUP_MSK) - control->flags |= IEEE80211_TX_CTL_DUP_DATA; + r->flags |= IEEE80211_TX_RC_DUP_DATA; if (rate_n_flags & RATE_MCS_SGI_MSK) - control->flags |= IEEE80211_TX_CTL_SHORT_GI; + r->flags |= IEEE80211_TX_RC_SHORT_GI; rate_index = iwl_hwrate_to_plcp_idx(rate_n_flags); - if (control->band == IEEE80211_BAND_5GHZ) + if (info->band == IEEE80211_BAND_5GHZ) rate_index -= IWL_FIRST_OFDM_RATE; - control->tx_rate_idx = rate_index; + r->idx = rate_index; } EXPORT_SYMBOL(iwl_hwrate_to_tx_control); diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index b1464c71ea0a..2cd33b4e9e13 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -2395,6 +2395,7 @@ static void iwl3945_build_tx_cmd_basic(struct iwl3945_priv *priv, { __le16 fc = hdr->frame_control; __le32 tx_flags = cmd->cmd.tx.tx_flags; + u8 rc_flags = info->control.rates[0].flags; cmd->cmd.tx.stop_time.life_time = TX_CMD_LIFE_TIME_INFINITE; if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) { @@ -2421,10 +2422,10 @@ static void iwl3945_build_tx_cmd_basic(struct iwl3945_priv *priv, tx_flags |= TX_CMD_FLG_SEQ_CTL_MSK; } - if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) { + if (rc_flags & IEEE80211_TX_RC_USE_RTS_CTS) { tx_flags |= TX_CMD_FLG_RTS_MSK; tx_flags &= ~TX_CMD_FLG_CTS_MSK; - } else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) { + } else if (rc_flags & IEEE80211_TX_RC_USE_CTS_PROTECT) { tx_flags &= ~TX_CMD_FLG_RTS_MSK; tx_flags |= TX_CMD_FLG_CTS_MSK; } diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c index 241ddcfa352e..d1fc305de5fe 100644 --- a/drivers/net/wireless/libertas_tf/main.c +++ b/drivers/net/wireless/libertas_tf/main.c @@ -592,14 +592,14 @@ EXPORT_SYMBOL_GPL(lbtf_remove_card); void lbtf_send_tx_feedback(struct lbtf_private *priv, u8 retrycnt, u8 fail) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(priv->tx_skb); - memset(&info->status, 0, sizeof(info->status)); + + ieee80211_tx_info_clear_status(info); /* * Commented out, otherwise we never go beyond 1Mbit/s using mac80211 * default pid rc algorithm. * * info->status.retry_count = MRVL_DEFAULT_RETRIES - retrycnt; */ - info->status.excessive_retries = fail ? 1 : 0; if (!(info->flags & IEEE80211_TX_CTL_NO_ACK) && !fail) info->flags |= IEEE80211_TX_STAT_ACK; skb_pull(priv->tx_skb, sizeof(struct txpd)); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index e2aeef8de707..c57652325286 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -209,7 +209,7 @@ static bool mac80211_hwsim_tx_frame(struct ieee80211_hw *hw, /* TODO: set mactime */ rx_status.freq = data->channel->center_freq; rx_status.band = data->channel->band; - rx_status.rate_idx = info->tx_rate_idx; + rx_status.rate_idx = info->control.rates[0].idx; /* TODO: simulate signal strength (and optional packet drop) */ /* Copy skb to all enabled radios that are on the current frequency */ @@ -269,13 +269,9 @@ static int mac80211_hwsim_tx(struct ieee80211_hw *hw, struct sk_buff *skb) if (txi->control.sta) hwsim_check_sta_magic(txi->control.sta); - memset(&txi->status, 0, sizeof(txi->status)); - if (!(txi->flags & IEEE80211_TX_CTL_NO_ACK)) { - if (ack) - txi->flags |= IEEE80211_TX_STAT_ACK; - else - txi->status.excessive_retries = 1; - } + ieee80211_tx_info_clear_status(txi); + if (!(txi->flags & IEEE80211_TX_CTL_NO_ACK) && ack) + txi->flags |= IEEE80211_TX_STAT_ACK; ieee80211_tx_status_irqsafe(hw, skb); return NETDEV_TX_OK; } diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c index 81a9756254fb..65be5eca2340 100644 --- a/drivers/net/wireless/p54/p54common.c +++ b/drivers/net/wireless/p54/p54common.c @@ -548,7 +548,7 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb) spin_lock_irqsave(&priv->tx_queue.lock, flags); while (entry != (struct sk_buff *)&priv->tx_queue) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(entry); - range = (void *)info->driver_data; + range = (void *)info->rate_driver_data; if (range->start_addr == addr) { struct p54_control_hdr *entry_hdr; struct p54_tx_control_allocdata *entry_data; @@ -559,7 +559,7 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb) struct memrecord *mr; ni = IEEE80211_SKB_CB(entry->next); - mr = (struct memrecord *)ni->driver_data; + mr = (struct memrecord *)ni->rate_driver_data; freed = mr->start_addr - last_addr; } else freed = priv->rx_end - last_addr; @@ -568,7 +568,7 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb) __skb_unlink(entry, &priv->tx_queue); spin_unlock_irqrestore(&priv->tx_queue.lock, flags); - memset(&info->status, 0, sizeof(info->status)); + ieee80211_tx_info_clear_status(info); entry_hdr = (struct p54_control_hdr *) entry->data; entry_data = (struct p54_tx_control_allocdata *) entry_hdr->data; if ((entry_hdr->magic1 & cpu_to_le16(0x4000)) != 0) @@ -578,10 +578,8 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb) if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) { if (!(payload->status & 0x01)) info->flags |= IEEE80211_TX_STAT_ACK; - else - info->status.excessive_retries = 1; } - info->status.retry_count = payload->retries - 1; + info->status.rates[0].count = payload->retries; info->status.ack_signal = p54_rssi_to_dbm(dev, le16_to_cpu(payload->ack_rssi)); skb_pull(entry, sizeof(*hdr) + pad + sizeof(*entry_data)); @@ -699,7 +697,7 @@ static void p54_assign_address(struct ieee80211_hw *dev, struct sk_buff *skb, while (left--) { u32 hole_size; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(entry); - struct memrecord *range = (void *)info->driver_data; + struct memrecord *range = (void *)info->rate_driver_data; hole_size = range->start_addr - last_addr; if (!target_skb && hole_size >= len) { target_skb = entry->prev; @@ -715,7 +713,7 @@ static void p54_assign_address(struct ieee80211_hw *dev, struct sk_buff *skb, largest_hole = max(largest_hole, priv->rx_end - last_addr - len); if (!skb_queue_empty(&priv->tx_queue)) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(target_skb); - struct memrecord *range = (void *)info->driver_data; + struct memrecord *range = (void *)info->rate_driver_data; target_addr = range->end_addr; } } else @@ -723,7 +721,7 @@ static void p54_assign_address(struct ieee80211_hw *dev, struct sk_buff *skb, if (skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct memrecord *range = (void *)info->driver_data; + struct memrecord *range = (void *)info->rate_driver_data; range->start_addr = target_addr; range->end_addr = target_addr + len; __skb_queue_after(&priv->tx_queue, target_skb, skb); @@ -806,6 +804,7 @@ static int p54_tx(struct ieee80211_hw *dev, struct sk_buff *skb) size_t padding, len; u8 rate; u8 cts_rate = 0x20; + u8 rc_flags; current_queue = &priv->tx_stats[skb_get_queue_mapping(skb) + 4]; if (unlikely(current_queue->len > current_queue->limit)) @@ -828,18 +827,19 @@ static int p54_tx(struct ieee80211_hw *dev, struct sk_buff *skb) hdr->magic1 = cpu_to_le16(0x0010); hdr->len = cpu_to_le16(len); hdr->type = (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 0 : cpu_to_le16(1); - hdr->retry1 = hdr->retry2 = info->control.retry_limit; + hdr->retry1 = hdr->retry2 = info->control.rates[0].count; /* TODO: add support for alternate retry TX rates */ rate = ieee80211_get_tx_rate(dev, info)->hw_value; - if (info->flags & IEEE80211_TX_CTL_SHORT_PREAMBLE) { + rc_flags = info->control.rates[0].flags; + if (rc_flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE) { rate |= 0x10; cts_rate |= 0x10; } - if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) { + if (rc_flags & IEEE80211_TX_RC_USE_RTS_CTS) { rate |= 0x40; cts_rate |= ieee80211_get_rts_cts_rate(dev, info)->hw_value; - } else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) { + } else if (rc_flags & IEEE80211_TX_RC_USE_CTS_PROTECT) { rate |= 0x20; cts_rate |= ieee80211_get_rts_cts_rate(dev, info)->hw_value; } diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 697806cf94e2..e1feab8b6b02 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -498,7 +498,9 @@ void rt2x00lib_txdone(struct queue_entry *entry, { struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev; struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(entry->skb); + struct skb_frame_desc *skbdesc = get_skb_frame_desc(entry->skb); enum data_queue_qid qid = skb_get_queue_mapping(entry->skb); + u8 rate_idx, rate_flags; /* * Unmap the skb. @@ -528,14 +530,18 @@ void rt2x00lib_txdone(struct queue_entry *entry, rt2x00dev->link.qual.tx_failed += test_bit(TXDONE_FAILURE, &txdesc->flags); + rate_idx = skbdesc->tx_rate_idx; + rate_flags = skbdesc->tx_rate_flags; + /* * Initialize TX status */ memset(&tx_info->status, 0, sizeof(tx_info->status)); tx_info->status.ack_signal = 0; - tx_info->status.excessive_retries = - test_bit(TXDONE_EXCESSIVE_RETRY, &txdesc->flags); - tx_info->status.retry_count = txdesc->retry; + tx_info->status.rates[0].idx = rate_idx; + tx_info->status.rates[0].flags = rate_flags; + tx_info->status.rates[0].count = txdesc->retry + 1; + tx_info->status.rates[1].idx = -1; /* terminate */ if (!(tx_info->flags & IEEE80211_TX_CTL_NO_ACK)) { if (test_bit(TXDONE_SUCCESS, &txdesc->flags)) @@ -544,7 +550,7 @@ void rt2x00lib_txdone(struct queue_entry *entry, rt2x00dev->low_level_stats.dot11ACKFailureCount++; } - if (tx_info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) { + if (rate_flags & IEEE80211_TX_RC_USE_RTS_CTS) { if (test_bit(TXDONE_SUCCESS, &txdesc->flags)) rt2x00dev->low_level_stats.dot11RTSSuccessCount++; else if (test_bit(TXDONE_FAILURE, &txdesc->flags)) diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c index 931183369f07..b32d59eafaa3 100644 --- a/drivers/net/wireless/rt2x00/rt2x00mac.c +++ b/drivers/net/wireless/rt2x00/rt2x00mac.c @@ -39,7 +39,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev, unsigned int data_length; int retval = 0; - if (tx_info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) + if (tx_info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) data_length = sizeof(struct ieee80211_cts); else data_length = sizeof(struct ieee80211_rts); @@ -64,11 +64,11 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev, */ memcpy(skb->cb, frag_skb->cb, sizeof(skb->cb)); rts_info = IEEE80211_SKB_CB(skb); - rts_info->flags &= ~IEEE80211_TX_CTL_USE_RTS_CTS; - rts_info->flags &= ~IEEE80211_TX_CTL_USE_CTS_PROTECT; + rts_info->control.rates[0].flags &= ~IEEE80211_TX_RC_USE_RTS_CTS; + rts_info->control.rates[0].flags &= ~IEEE80211_TX_RC_USE_CTS_PROTECT; rts_info->flags &= ~IEEE80211_TX_CTL_REQ_TX_STATUS; - if (tx_info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) + if (tx_info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) rts_info->flags |= IEEE80211_TX_CTL_NO_ACK; else rts_info->flags &= ~IEEE80211_TX_CTL_NO_ACK; @@ -84,7 +84,7 @@ static int rt2x00mac_tx_rts_cts(struct rt2x00_dev *rt2x00dev, data_length += rt2x00crypto_tx_overhead(tx_info); #endif /* CONFIG_RT2X00_LIB_CRYPTO */ - if (tx_info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) + if (tx_info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) ieee80211_ctstoself_get(rt2x00dev->hw, tx_info->control.vif, frag_skb->data, data_length, tx_info, (struct ieee80211_cts *)(skb->data)); @@ -146,8 +146,8 @@ int rt2x00mac_tx(struct ieee80211_hw *hw, struct sk_buff *skb) * inside the hardware. */ frame_control = le16_to_cpu(ieee80211hdr->frame_control); - if ((tx_info->flags & (IEEE80211_TX_CTL_USE_RTS_CTS | - IEEE80211_TX_CTL_USE_CTS_PROTECT)) && + if ((tx_info->control.rates[0].flags & (IEEE80211_TX_RC_USE_RTS_CTS | + IEEE80211_TX_RC_USE_CTS_PROTECT)) && !rt2x00dev->ops->hw->set_rts_threshold) { if (rt2x00queue_available(queue) <= 1) goto exit_fail; diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c index 451d410ecdae..070786ebd076 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/rt2x00/rt2x00queue.c @@ -230,8 +230,15 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry, /* * Determine retry information. */ - txdesc->retry_limit = tx_info->control.retry_limit; - if (tx_info->flags & IEEE80211_TX_CTL_LONG_RETRY_LIMIT) + txdesc->retry_limit = tx_info->control.rates[0].count - 1; + /* + * XXX: If at this point we knew whether the HW is going to use + * the RETRY_MODE bit or the retry_limit (currently all + * use the RETRY_MODE bit) we could do something like b43 + * does, set the RETRY_MODE bit when the RC algorithm is + * requesting more than the long retry limit. + */ + if (tx_info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) __set_bit(ENTRY_TXD_RETRY_MODE, &txdesc->flags); /* @@ -371,10 +378,12 @@ static void rt2x00queue_write_tx_descriptor(struct queue_entry *entry, int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb) { + struct ieee80211_tx_info *tx_info; struct queue_entry *entry = rt2x00queue_get_entry(queue, Q_INDEX); struct txentry_desc txdesc; struct skb_frame_desc *skbdesc; unsigned int iv_len = 0; + u8 rate_idx, rate_flags; if (unlikely(rt2x00queue_full(queue))) return -EINVAL; @@ -399,13 +408,18 @@ int rt2x00queue_write_tx_frame(struct data_queue *queue, struct sk_buff *skb) iv_len = IEEE80211_SKB_CB(skb)->control.hw_key->iv_len; /* - * All information is retreived from the skb->cb array, + * All information is retrieved from the skb->cb array, * now we should claim ownership of the driver part of that - * array. + * array, preserving the bitrate index and flags. */ + tx_info = IEEE80211_SKB_CB(skb); + rate_idx = tx_info->control.rates[0].idx; + rate_flags = tx_info->control.rates[0].flags; skbdesc = get_skb_frame_desc(entry->skb); memset(skbdesc, 0, sizeof(*skbdesc)); skbdesc->entry = entry; + skbdesc->tx_rate_idx = rate_idx; + skbdesc->tx_rate_flags = rate_flags; /* * When hardware encryption is supported, and this frame diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.h b/drivers/net/wireless/rt2x00/rt2x00queue.h index 9dbf04f0f04c..4d3c7246f9ae 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.h +++ b/drivers/net/wireless/rt2x00/rt2x00queue.h @@ -104,6 +104,8 @@ enum skb_frame_desc_flags { * * @flags: Frame flags, see &enum skb_frame_desc_flags. * @desc_len: Length of the frame descriptor. + * @tx_rate_idx: the index of the TX rate, used for TX status reporting + * @tx_rate_flags: the TX rate flags, used for TX status reporting * @desc: Pointer to descriptor part of the frame. * Note that this pointer could point to something outside * of the scope of the skb->data pointer. @@ -113,9 +115,12 @@ enum skb_frame_desc_flags { * @entry: The entry to which this sk buffer belongs. */ struct skb_frame_desc { - unsigned int flags; + u8 flags; + + u8 desc_len; + u8 tx_rate_idx; + u8 tx_rate_flags; - unsigned int desc_len; void *desc; __le32 iv; diff --git a/drivers/net/wireless/rtl8180_dev.c b/drivers/net/wireless/rtl8180_dev.c index e8d22393797f..6c226c024dd9 100644 --- a/drivers/net/wireless/rtl8180_dev.c +++ b/drivers/net/wireless/rtl8180_dev.c @@ -182,15 +182,13 @@ static void rtl8180_handle_tx(struct ieee80211_hw *dev, unsigned int prio) skb->len, PCI_DMA_TODEVICE); info = IEEE80211_SKB_CB(skb); - memset(&info->status, 0, sizeof(info->status)); + ieee80211_tx_info_clear_status(info); - if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) { - if (flags & RTL818X_TX_DESC_FLAG_TX_OK) - info->flags |= IEEE80211_TX_STAT_ACK; - else - info->status.excessive_retries = 1; - } - info->status.retry_count = flags & 0xFF; + if (!(info->flags & IEEE80211_TX_CTL_NO_ACK) && + (flags & RTL818X_TX_DESC_FLAG_TX_OK)) + info->flags |= IEEE80211_TX_STAT_ACK; + + info->status.rates[0].count = (flags & 0xFF) + 1; ieee80211_tx_status_irqsafe(dev, skb); if (ring->entries - skb_queue_len(&ring->queue) == 2) @@ -243,6 +241,7 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb) unsigned int idx, prio; dma_addr_t mapping; u32 tx_flags; + u8 rc_flags; u16 plcp_len = 0; __le16 rts_duration = 0; @@ -261,15 +260,16 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb) tx_flags |= RTL818X_TX_DESC_FLAG_DMA | RTL818X_TX_DESC_FLAG_NO_ENC; - if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) { + rc_flags = info->control.rates[0].flags; + if (rc_flags & IEEE80211_TX_RC_USE_RTS_CTS) { tx_flags |= RTL818X_TX_DESC_FLAG_RTS; tx_flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19; - } else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) { + } else if (rc_flags & IEEE80211_TX_RC_USE_CTS_PROTECT) { tx_flags |= RTL818X_TX_DESC_FLAG_CTS; tx_flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19; } - if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) + if (rc_flags & IEEE80211_TX_RC_USE_RTS_CTS) rts_duration = ieee80211_rts_duration(dev, priv->vif, skb->len, info); @@ -292,9 +292,9 @@ static int rtl8180_tx(struct ieee80211_hw *dev, struct sk_buff *skb) entry->plcp_len = cpu_to_le16(plcp_len); entry->tx_buf = cpu_to_le32(mapping); entry->frame_len = cpu_to_le32(skb->len); - entry->flags2 = info->control.retries[0].rate_idx >= 0 ? + entry->flags2 = info->control.rates[1].idx >= 0 ? ieee80211_get_alt_retry_rate(dev, info, 0)->bitrate << 4 : 0; - entry->retry_limit = info->control.retry_limit; + entry->retry_limit = info->control.rates[0].count; entry->flags = cpu_to_le32(tx_flags); __skb_queue_tail(&ring->queue, skb); if (ring->entries - skb_queue_len(&ring->queue) < 2) @@ -855,7 +855,7 @@ static int __devinit rtl8180_probe(struct pci_dev *pdev, priv = dev->priv; priv->pdev = pdev; - dev->max_altrates = 1; + dev->max_rates = 2; SET_IEEE80211_DEV(dev, &pdev->dev); pci_set_drvdata(pdev, dev); diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c index cd839bcb6d78..6260ed8ce12b 100644 --- a/drivers/net/wireless/rtl8187_dev.c +++ b/drivers/net/wireless/rtl8187_dev.c @@ -160,13 +160,13 @@ static void rtl8187_tx_cb(struct urb *urb) { struct sk_buff *skb = (struct sk_buff *)urb->context; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_hw *hw = info->driver_data[0]; + struct ieee80211_hw *hw = info->rate_driver_data[0]; struct rtl8187_priv *priv = hw->priv; - usb_free_urb(info->driver_data[1]); + usb_free_urb(info->rate_driver_data[1]); skb_pull(skb, priv->is_rtl8187b ? sizeof(struct rtl8187b_tx_hdr) : sizeof(struct rtl8187_tx_hdr)); - memset(&info->status, 0, sizeof(info->status)); + ieee80211_tx_info_clear_status(info); info->flags |= IEEE80211_TX_STAT_ACK; ieee80211_tx_status_irqsafe(hw, skb); } @@ -194,12 +194,12 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb) flags |= ieee80211_get_tx_rate(dev, info)->hw_value << 24; if (ieee80211_has_morefrags(((struct ieee80211_hdr *)skb->data)->frame_control)) flags |= RTL818X_TX_DESC_FLAG_MOREFRAG; - if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) { + if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) { flags |= RTL818X_TX_DESC_FLAG_RTS; flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19; rts_dur = ieee80211_rts_duration(dev, priv->vif, skb->len, info); - } else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) { + } else if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) { flags |= RTL818X_TX_DESC_FLAG_CTS; flags |= ieee80211_get_rts_cts_rate(dev, info)->hw_value << 19; } @@ -210,7 +210,7 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb) hdr->flags = cpu_to_le32(flags); hdr->len = 0; hdr->rts_duration = rts_dur; - hdr->retry = cpu_to_le32(info->control.retry_limit << 8); + hdr->retry = cpu_to_le32((info->control.rates[0].count - 1) << 8); buf = hdr; ep = 2; @@ -228,7 +228,7 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb) memset(hdr, 0, sizeof(*hdr)); hdr->flags = cpu_to_le32(flags); hdr->rts_duration = rts_dur; - hdr->retry = cpu_to_le32(info->control.retry_limit << 8); + hdr->retry = cpu_to_le32((info->control.rates[0].count - 1) << 8); hdr->tx_duration = ieee80211_generic_frame_duration(dev, priv->vif, skb->len, txrate); @@ -240,8 +240,8 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb) ep = epmap[skb_get_queue_mapping(skb)]; } - info->driver_data[0] = dev; - info->driver_data[1] = urb; + info->rate_driver_data[0] = dev; + info->rate_driver_data[1] = urb; usb_fill_bulk_urb(urb, priv->udev, usb_sndbulkpipe(priv->udev, ep), buf, skb->len, rtl8187_tx_cb, skb); diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 6c3e21887fc8..2f0802b29c4b 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -296,15 +296,14 @@ static void zd_op_stop(struct ieee80211_hw *hw) * If no status information has been requested, the skb is freed. */ static void tx_status(struct ieee80211_hw *hw, struct sk_buff *skb, - u32 flags, int ackssi, bool success) + int ackssi, bool success) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - memset(&info->status, 0, sizeof(info->status)); + ieee80211_tx_info_clear_status(info); - if (!success) - info->status.excessive_retries = 1; - info->flags |= flags; + if (success) + info->flags |= IEEE80211_TX_STAT_ACK; info->status.ack_signal = ackssi; ieee80211_tx_status_irqsafe(hw, skb); } @@ -326,7 +325,7 @@ void zd_mac_tx_failed(struct ieee80211_hw *hw) if (skb == NULL) return; - tx_status(hw, skb, 0, 0, 0); + tx_status(hw, skb, 0, 0); } /** @@ -342,12 +341,12 @@ void zd_mac_tx_failed(struct ieee80211_hw *hw) void zd_mac_tx_to_dev(struct sk_buff *skb, int error) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_hw *hw = info->driver_data[0]; + struct ieee80211_hw *hw = info->rate_driver_data[0]; skb_pull(skb, sizeof(struct zd_ctrlset)); if (unlikely(error || (info->flags & IEEE80211_TX_CTL_NO_ACK))) { - tx_status(hw, skb, 0, 0, !error); + tx_status(hw, skb, 0, !error); } else { struct sk_buff_head *q = &zd_hw_mac(hw)->ack_wait_queue; @@ -406,7 +405,8 @@ static int zd_calc_tx_length_us(u8 *service, u8 zd_rate, u16 tx_length) } static void cs_set_control(struct zd_mac *mac, struct zd_ctrlset *cs, - struct ieee80211_hdr *header, u32 flags) + struct ieee80211_hdr *header, + struct ieee80211_tx_info *info) { /* * CONTROL TODO: @@ -417,7 +417,7 @@ static void cs_set_control(struct zd_mac *mac, struct zd_ctrlset *cs, cs->control = 0; /* First fragment */ - if (flags & IEEE80211_TX_CTL_FIRST_FRAGMENT) + if (info->flags & IEEE80211_TX_CTL_FIRST_FRAGMENT) cs->control |= ZD_CS_NEED_RANDOM_BACKOFF; /* Multicast */ @@ -428,10 +428,10 @@ static void cs_set_control(struct zd_mac *mac, struct zd_ctrlset *cs, if (ieee80211_is_pspoll(header->frame_control)) cs->control |= ZD_CS_PS_POLL_FRAME; - if (flags & IEEE80211_TX_CTL_USE_RTS_CTS) + if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) cs->control |= ZD_CS_RTS; - if (flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) + if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) cs->control |= ZD_CS_SELF_CTS; /* FIXME: Management frame? */ @@ -517,12 +517,12 @@ static int fill_ctrlset(struct zd_mac *mac, txrate = ieee80211_get_tx_rate(mac->hw, info); cs->modulation = txrate->hw_value; - if (info->flags & IEEE80211_TX_CTL_SHORT_PREAMBLE) + if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE) cs->modulation = txrate->hw_value_short; cs->tx_length = cpu_to_le16(frag_len); - cs_set_control(mac, cs, hdr, info->flags); + cs_set_control(mac, cs, hdr, info); packet_length = frag_len + sizeof(struct zd_ctrlset) + 10; ZD_ASSERT(packet_length <= 0xffff); @@ -577,7 +577,7 @@ static int zd_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb) if (r) return r; - info->driver_data[0] = hw; + info->rate_driver_data[0] = hw; r = zd_usb_tx(&mac->chip.usb, skb); if (r) @@ -618,7 +618,7 @@ static int filter_ack(struct ieee80211_hw *hw, struct ieee80211_hdr *rx_hdr, if (likely(!compare_ether_addr(tx_hdr->addr2, rx_hdr->addr1))) { __skb_unlink(skb, q); - tx_status(hw, skb, IEEE80211_TX_STAT_ACK, stats->signal, 1); + tx_status(hw, skb, stats->signal, 1); goto out; } } diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index a60ae86bd5c9..d7a2f52e40cf 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -907,7 +907,7 @@ free_urb: * it might be freed by zd_mac_tx_to_dev or mac80211) */ info = IEEE80211_SKB_CB(skb); - usb = &zd_hw_mac(info->driver_data[0])->chip.usb; + usb = &zd_hw_mac(info->rate_driver_data[0])->chip.usb; zd_mac_tx_to_dev(skb, urb->status); free_tx_urb(usb, urb); tx_dec_submitted_urbs(usb); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9801afb62545..3741c0a7978a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -214,29 +214,24 @@ struct ieee80211_bss_conf { * These flags are used with the @flags member of &ieee80211_tx_info. * * @IEEE80211_TX_CTL_REQ_TX_STATUS: request TX status callback for this frame. - * @IEEE80211_TX_CTL_USE_RTS_CTS: use RTS-CTS before sending frame - * @IEEE80211_TX_CTL_USE_CTS_PROTECT: use CTS protection for the frame (e.g., - * for combined 802.11g / 802.11b networks) + * @IEEE80211_TX_CTL_ASSIGN_SEQ: The driver has to assign a sequence + * number to this frame, taking care of not overwriting the fragment + * number and increasing the sequence number only when the + * IEEE80211_TX_CTL_FIRST_FRAGMENT flag is set. mac80211 will properly + * assign sequence numbers to QoS-data frames but cannot do so correctly + * for non-QoS-data and management frames because beacons need them from + * that counter as well and mac80211 cannot guarantee proper sequencing. + * If this flag is set, the driver should instruct the hardware to + * assign a sequence number to the frame or assign one itself. Cf. IEEE + * 802.11-2007 7.1.3.4.1 paragraph 3. This flag will always be set for + * beacons and always be clear for frames without a sequence number field. * @IEEE80211_TX_CTL_NO_ACK: tell the low level not to wait for an ack - * @IEEE80211_TX_CTL_RATE_CTRL_PROBE: TBD * @IEEE80211_TX_CTL_CLEAR_PS_FILT: clear powersave filter for destination * station - * @IEEE80211_TX_CTL_REQUEUE: TBD * @IEEE80211_TX_CTL_FIRST_FRAGMENT: this is a first fragment of the frame - * @IEEE80211_TX_CTL_SHORT_PREAMBLE: TBD - * @IEEE80211_TX_CTL_LONG_RETRY_LIMIT: this frame should be send using the - * through set_retry_limit configured long retry value * @IEEE80211_TX_CTL_SEND_AFTER_DTIM: send this frame after DTIM beacon * @IEEE80211_TX_CTL_AMPDU: this frame should be sent as part of an A-MPDU - * @IEEE80211_TX_CTL_OFDM_HT: this frame can be sent in HT OFDM rates. number - * of streams when this flag is on can be extracted from antenna_sel_tx, - * so if 1 antenna is marked use SISO, 2 antennas marked use MIMO, n - * antennas marked use MIMO_n. - * @IEEE80211_TX_CTL_GREEN_FIELD: use green field protection for this frame - * @IEEE80211_TX_CTL_40_MHZ_WIDTH: send this frame using 40 Mhz channel width - * @IEEE80211_TX_CTL_DUP_DATA: duplicate data frame on both 20 Mhz channels - * @IEEE80211_TX_CTL_SHORT_GI: send this frame using short guard interval - * @IEEE80211_TX_CTL_INJECTED: TBD + * @IEEE80211_TX_CTL_INJECTED: Frame was injected, internal to mac80211. * @IEEE80211_TX_STAT_TX_FILTERED: The frame was not transmitted * because the destination STA was in powersave mode. * @IEEE80211_TX_STAT_ACK: Frame was acknowledged @@ -244,62 +239,70 @@ struct ieee80211_bss_conf { * is for the whole aggregation. * @IEEE80211_TX_STAT_AMPDU_NO_BACK: no block ack was returned, * so consider using block ack request (BAR). - * @IEEE80211_TX_CTL_ASSIGN_SEQ: The driver has to assign a sequence - * number to this frame, taking care of not overwriting the fragment - * number and increasing the sequence number only when the - * IEEE80211_TX_CTL_FIRST_FRAGMENT flags is set. mac80211 will properly - * assign sequence numbers to QoS-data frames but cannot do so correctly - * for non-QoS-data and management frames because beacons need them from - * that counter as well and mac80211 cannot guarantee proper sequencing. - * If this flag is set, the driver should instruct the hardware to - * assign a sequence number to the frame or assign one itself. Cf. IEEE - * 802.11-2007 7.1.3.4.1 paragraph 3. This flag will always be set for - * beacons always be clear for frames without a sequence number field. + * @IEEE80211_TX_CTL_RATE_CTRL_PROBE: internal to mac80211, can be + * set by rate control algorithms to indicate probe rate, will + * be cleared for fragmented frames (except on the last fragment) + * @IEEE80211_TX_CTL_REQUEUE: REMOVE THIS */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), - IEEE80211_TX_CTL_USE_RTS_CTS = BIT(2), - IEEE80211_TX_CTL_USE_CTS_PROTECT = BIT(3), - IEEE80211_TX_CTL_NO_ACK = BIT(4), - IEEE80211_TX_CTL_RATE_CTRL_PROBE = BIT(5), - IEEE80211_TX_CTL_CLEAR_PS_FILT = BIT(6), - IEEE80211_TX_CTL_REQUEUE = BIT(7), - IEEE80211_TX_CTL_FIRST_FRAGMENT = BIT(8), - IEEE80211_TX_CTL_SHORT_PREAMBLE = BIT(9), - IEEE80211_TX_CTL_LONG_RETRY_LIMIT = BIT(10), - IEEE80211_TX_CTL_SEND_AFTER_DTIM = BIT(12), - IEEE80211_TX_CTL_AMPDU = BIT(13), - IEEE80211_TX_CTL_OFDM_HT = BIT(14), - IEEE80211_TX_CTL_GREEN_FIELD = BIT(15), - IEEE80211_TX_CTL_40_MHZ_WIDTH = BIT(16), - IEEE80211_TX_CTL_DUP_DATA = BIT(17), - IEEE80211_TX_CTL_SHORT_GI = BIT(18), - IEEE80211_TX_CTL_INJECTED = BIT(19), - IEEE80211_TX_STAT_TX_FILTERED = BIT(20), - IEEE80211_TX_STAT_ACK = BIT(21), - IEEE80211_TX_STAT_AMPDU = BIT(22), - IEEE80211_TX_STAT_AMPDU_NO_BACK = BIT(23), - IEEE80211_TX_CTL_ASSIGN_SEQ = BIT(24), + IEEE80211_TX_CTL_ASSIGN_SEQ = BIT(1), + IEEE80211_TX_CTL_NO_ACK = BIT(2), + IEEE80211_TX_CTL_CLEAR_PS_FILT = BIT(3), + IEEE80211_TX_CTL_FIRST_FRAGMENT = BIT(4), + IEEE80211_TX_CTL_SEND_AFTER_DTIM = BIT(5), + IEEE80211_TX_CTL_AMPDU = BIT(6), + IEEE80211_TX_CTL_INJECTED = BIT(7), + IEEE80211_TX_STAT_TX_FILTERED = BIT(8), + IEEE80211_TX_STAT_ACK = BIT(9), + IEEE80211_TX_STAT_AMPDU = BIT(10), + IEEE80211_TX_STAT_AMPDU_NO_BACK = BIT(11), + IEEE80211_TX_CTL_RATE_CTRL_PROBE = BIT(12), + + /* XXX: remove this */ + IEEE80211_TX_CTL_REQUEUE = BIT(13), }; +enum mac80211_rate_control_flags { + IEEE80211_TX_RC_USE_RTS_CTS = BIT(0), + IEEE80211_TX_RC_USE_CTS_PROTECT = BIT(1), + IEEE80211_TX_RC_USE_SHORT_PREAMBLE = BIT(2), + + /* rate index is an MCS rate number instead of an index */ + IEEE80211_TX_RC_MCS = BIT(3), + IEEE80211_TX_RC_GREEN_FIELD = BIT(4), + IEEE80211_TX_RC_40_MHZ_WIDTH = BIT(5), + IEEE80211_TX_RC_DUP_DATA = BIT(6), + IEEE80211_TX_RC_SHORT_GI = BIT(7), +}; + + +/* there are 40 bytes if you don't need the rateset to be kept */ +#define IEEE80211_TX_INFO_DRIVER_DATA_SIZE 40 -#define IEEE80211_TX_INFO_DRIVER_DATA_SIZE \ - (sizeof(((struct sk_buff *)0)->cb) - 8) -#define IEEE80211_TX_INFO_DRIVER_DATA_PTRS \ - (IEEE80211_TX_INFO_DRIVER_DATA_SIZE / sizeof(void *)) +/* if you do need the rateset, then you have less space */ +#define IEEE80211_TX_INFO_RATE_DRIVER_DATA_SIZE 24 -/* maximum number of alternate rate retry stages */ -#define IEEE80211_TX_MAX_ALTRATE 3 +/* maximum number of rate stages */ +#define IEEE80211_TX_MAX_RATES 5 /** - * struct ieee80211_tx_altrate - alternate rate selection/status + * struct ieee80211_tx_rate - rate selection/status * - * @rate_idx: rate index to attempt to send with + * @idx: rate index to attempt to send with + * @flags: rate control flags (&enum mac80211_rate_control_flags) * @limit: number of retries before fallback + * + * A value of -1 for @idx indicates an invalid rate and, if used + * in an array of retry rates, that no more rates should be tried. + * + * When used for transmit status reporting, the driver should + * always report the rate along with the flags it used. */ -struct ieee80211_tx_altrate { - s8 rate_idx; - u8 limit; +struct ieee80211_tx_rate { + s8 idx; + u8 count; + u8 flags; }; /** @@ -314,15 +317,12 @@ struct ieee80211_tx_altrate { * it may be NULL. * * @flags: transmit info flags, defined above - * @band: TBD - * @tx_rate_idx: TBD + * @band: the band to transmit on (use for checking for races) * @antenna_sel_tx: antenna to use, 0 for automatic diversity * @control: union for control data * @status: union for status data * @driver_data: array of driver_data pointers * @retry_count: number of retries - * @excessive_retries: set to 1 if the frame was retried many times - * but not acknowledged * @ampdu_ack_len: number of aggregated frames. * relevant only if IEEE80211_TX_STATUS_AMPDU was set. * @ampdu_ack_map: block ack bit map for the aggregation. @@ -333,31 +333,43 @@ struct ieee80211_tx_info { /* common information */ u32 flags; u8 band; - s8 tx_rate_idx; + u8 antenna_sel_tx; - /* 1 byte hole */ + /* 2 byte hole */ union { struct { + union { + /* rate control */ + struct { + struct ieee80211_tx_rate rates[ + IEEE80211_TX_MAX_RATES]; + s8 rts_cts_rate_idx; + }; + /* only needed before rate control */ + unsigned long jiffies; + }; /* NB: vif can be NULL for injected frames */ struct ieee80211_vif *vif; struct ieee80211_key_conf *hw_key; struct ieee80211_sta *sta; - unsigned long jiffies; - s8 rts_cts_rate_idx; - u8 retry_limit; - struct ieee80211_tx_altrate retries[IEEE80211_TX_MAX_ALTRATE]; } control; struct { + struct ieee80211_tx_rate rates[IEEE80211_TX_MAX_RATES]; + u8 ampdu_ack_len; u64 ampdu_ack_map; int ack_signal; - struct ieee80211_tx_altrate retries[IEEE80211_TX_MAX_ALTRATE + 1]; - u8 retry_count; - bool excessive_retries; - u8 ampdu_ack_len; + /* 8 bytes free */ } status; - void *driver_data[IEEE80211_TX_INFO_DRIVER_DATA_PTRS]; + struct { + struct ieee80211_tx_rate driver_rates[ + IEEE80211_TX_MAX_RATES]; + void *rate_driver_data[ + IEEE80211_TX_INFO_RATE_DRIVER_DATA_SIZE / sizeof(void *)]; + }; + void *driver_data[ + IEEE80211_TX_INFO_DRIVER_DATA_SIZE / sizeof(void *)]; }; }; @@ -366,6 +378,41 @@ static inline struct ieee80211_tx_info *IEEE80211_SKB_CB(struct sk_buff *skb) return (struct ieee80211_tx_info *)skb->cb; } +/** + * ieee80211_tx_info_clear_status - clear TX status + * + * @info: The &struct ieee80211_tx_info to be cleared. + * + * When the driver passes an skb back to mac80211, it must report + * a number of things in TX status. This function clears everything + * in the TX status but the rate control information (it does clear + * the count since you need to fill that in anyway). + * + * NOTE: You can only use this function if you do NOT use + * info->driver_data! Use info->rate_driver_data + * instead if you need only the less space that allows. + */ +static inline void +ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) +{ + int i; + + BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, status.rates) != + offsetof(struct ieee80211_tx_info, control.rates)); + BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, status.rates) != + offsetof(struct ieee80211_tx_info, driver_rates)); + BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, status.rates) != 8); + /* clear the rate counts */ + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) + info->status.rates[i].count = 0; + + BUILD_BUG_ON( + offsetof(struct ieee80211_tx_info, status.ampdu_ack_len) != 23); + memset(&info->status.ampdu_ack_len, 0, + sizeof(struct ieee80211_tx_info) - + offsetof(struct ieee80211_tx_info, status.ampdu_ack_len)); +} + /** * enum mac80211_rx_flags - receive flags @@ -869,8 +916,8 @@ enum ieee80211_hw_flags { * @sta_data_size: size (in bytes) of the drv_priv data area * within &struct ieee80211_sta. * - * @max_altrates: maximum number of alternate rate retry stages - * @max_altrate_tries: maximum number of tries for each stage + * @max_rates: maximum number of alternate rate retry stages + * @max_rate_tries: maximum number of tries for each stage */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -887,8 +934,8 @@ struct ieee80211_hw { u16 ampdu_queues; u16 max_listen_interval; s8 max_signal; - u8 max_altrates; - u8 max_altrate_tries; + u8 max_rates; + u8 max_rate_tries; }; /** @@ -927,9 +974,9 @@ static inline struct ieee80211_rate * ieee80211_get_tx_rate(const struct ieee80211_hw *hw, const struct ieee80211_tx_info *c) { - if (WARN_ON(c->tx_rate_idx < 0)) + if (WARN_ON(c->control.rates[0].idx < 0)) return NULL; - return &hw->wiphy->bands[c->band]->bitrates[c->tx_rate_idx]; + return &hw->wiphy->bands[c->band]->bitrates[c->control.rates[0].idx]; } static inline struct ieee80211_rate * @@ -945,9 +992,9 @@ static inline struct ieee80211_rate * ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, const struct ieee80211_tx_info *c, int idx) { - if (c->control.retries[idx].rate_idx < 0) + if (c->control.rates[idx + 1].idx < 0) return NULL; - return &hw->wiphy->bands[c->band]->bitrates[c->control.retries[idx].rate_idx]; + return &hw->wiphy->bands[c->band]->bitrates[c->control.rates[idx + 1].idx]; } /** @@ -1840,17 +1887,30 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, /* Rate control API */ + /** - * struct rate_selection - rate information for/from rate control algorithms - * - * @rate_idx: selected transmission rate index - * @nonerp_idx: Non-ERP rate to use instead if ERP cannot be used - * @probe_idx: rate for probing (or -1) - * @max_rate_idx: maximum rate index that can be used, this is - * input to the algorithm and will be enforced - */ -struct rate_selection { - s8 rate_idx, nonerp_idx, probe_idx, max_rate_idx; + * struct ieee80211_tx_rate_control - rate control information for/from RC algo + * + * @hw: The hardware the algorithm is invoked for. + * @sband: The band this frame is being transmitted on. + * @bss_conf: the current BSS configuration + * @reported_rate: The rate control algorithm can fill this in to indicate + * which rate should be reported to userspace as the current rate and + * used for rate calculations in the mesh network. + * @rts: whether RTS will be used for this frame because it is longer than the + * RTS threshold + * @short_preamble: whether mac80211 will request short-preamble transmission + * if the selected rate supports it + * @max_rate_idx: user-requested maximum rate (not MCS for now) + */ +struct ieee80211_tx_rate_control { + struct ieee80211_hw *hw; + struct ieee80211_supported_band *sband; + struct ieee80211_bss_conf *bss_conf; + struct sk_buff *skb; + struct ieee80211_tx_rate reported_rate; + bool rts, short_preamble; + u8 max_rate_idx; }; struct rate_control_ops { @@ -1869,10 +1929,8 @@ struct rate_control_ops { void (*tx_status)(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_sta *sta, void *priv_sta, struct sk_buff *skb); - void (*get_rate)(void *priv, struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, - struct sk_buff *skb, - struct rate_selection *sel); + void (*get_rate)(void *priv, struct ieee80211_sta *sta, void *priv_sta, + struct ieee80211_tx_rate_control *txrc); void (*add_sta_debugfs)(void *priv, void *priv_sta, struct dentry *dir); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6f8756d26a93..fe4efdd4253d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -142,7 +142,6 @@ typedef unsigned __bitwise__ ieee80211_tx_result; #define IEEE80211_TX_FRAGMENTED BIT(0) #define IEEE80211_TX_UNICAST BIT(1) #define IEEE80211_TX_PS_BUFFERED BIT(2) -#define IEEE80211_TX_PROBE_LAST_FRAG BIT(3) struct ieee80211_tx_data { struct sk_buff *skb; @@ -153,11 +152,6 @@ struct ieee80211_tx_data { struct ieee80211_key *key; struct ieee80211_channel *channel; - s8 rate_idx; - /* use this rate (if set) for last fragment; rate can - * be set to lower rate for the first fragments, e.g., - * when using CTS protection with IEEE 802.11g. */ - s8 last_frag_rate_idx; /* Extra fragments (in addition to the first fragment * in skb) */ @@ -203,9 +197,7 @@ struct ieee80211_rx_data { struct ieee80211_tx_stored_packet { struct sk_buff *skb; struct sk_buff **extra_frag; - s8 last_frag_rate_idx; int num_extra_frag; - bool last_frag_rate_ctrl_probe; }; struct beacon_data { diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ffff54944f9d..88c1975a97a5 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -41,6 +41,8 @@ */ struct ieee80211_tx_status_rtap_hdr { struct ieee80211_radiotap_header hdr; + u8 rate; + u8 padding_for_rate; __le16 tx_flags; u8 data_retries; } __attribute__ ((packed)); @@ -465,13 +467,28 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_sub_if_data *sdata; struct net_device *prev_dev = NULL; struct sta_info *sta; + int retry_count = -1, i; + + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { + /* the HW cannot have attempted that rate */ + if (i >= hw->max_rates) { + info->status.rates[i].idx = -1; + info->status.rates[i].count = 0; + } + + retry_count += info->status.rates[i].count; + } + if (retry_count < 0) + retry_count = 0; rcu_read_lock(); + sband = local->hw.wiphy->bands[info->band]; + sta = sta_info_get(local, hdr->addr1); if (sta) { - if (info->status.excessive_retries && + if (!(info->flags & IEEE80211_TX_STAT_ACK) && test_sta_flags(sta, WLAN_STA_PS)) { /* * The STA is in power save mode, so assume @@ -502,12 +519,11 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) rcu_read_unlock(); return; } else { - if (info->status.excessive_retries) + if (!(info->flags & IEEE80211_TX_STAT_ACK)) sta->tx_retry_failed++; - sta->tx_retry_count += info->status.retry_count; + sta->tx_retry_count += retry_count; } - sband = local->hw.wiphy->bands[info->band]; rate_control_tx_status(local, sband, sta, skb); } @@ -528,9 +544,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) local->dot11TransmittedFrameCount++; if (is_multicast_ether_addr(hdr->addr1)) local->dot11MulticastTransmittedFrameCount++; - if (info->status.retry_count > 0) + if (retry_count > 0) local->dot11RetryCount++; - if (info->status.retry_count > 1) + if (retry_count > 1) local->dot11MultipleRetryCount++; } @@ -574,19 +590,30 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) rthdr->hdr.it_len = cpu_to_le16(sizeof(*rthdr)); rthdr->hdr.it_present = cpu_to_le32((1 << IEEE80211_RADIOTAP_TX_FLAGS) | - (1 << IEEE80211_RADIOTAP_DATA_RETRIES)); + (1 << IEEE80211_RADIOTAP_DATA_RETRIES) | + (1 << IEEE80211_RADIOTAP_RATE)); if (!(info->flags & IEEE80211_TX_STAT_ACK) && !is_multicast_ether_addr(hdr->addr1)) rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_FAIL); - if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) && - (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) + /* + * XXX: Once radiotap gets the bitmap reset thing the vendor + * extensions proposal contains, we can actually report + * the whole set of tries we did. + */ + if ((info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) || + (info->status.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT)) rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_CTS); - else if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) + else if (info->status.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) rthdr->tx_flags |= cpu_to_le16(IEEE80211_RADIOTAP_F_TX_RTS); + if (info->status.rates[0].idx >= 0 && + !(info->status.rates[0].flags & IEEE80211_TX_RC_MCS)) + rthdr->rate = sband->bitrates[ + info->status.rates[0].idx].bitrate / 5; - rthdr->data_retries = info->status.retry_count; + /* for now report the total retry_count */ + rthdr->data_retries = retry_count; /* XXX: is this sufficient for BPF? */ skb_set_mac_header(skb, 0); @@ -671,8 +698,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, BUG_ON(!ops->configure_filter); local->ops = ops; - local->hw.queues = 1; /* default */ - + /* set up some defaults */ + local->hw.queues = 1; + local->hw.max_rates = 1; local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD; local->hw.conf.long_frame_max_tx_count = 4; diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 501c7831adb4..e8d573d592e7 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -218,12 +218,16 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, if (sta->fail_avg >= 100) return MAX_METRIC; + + if (sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS) + return MAX_METRIC; + err = (sta->fail_avg << ARITH_SHIFT) / 100; /* bitrate is in units of 100 Kbps, while we need rate in units of * 1Mbps. This will be corrected on tx_time computation. */ - rate = sband->bitrates[sta->last_txrate_idx].bitrate; + rate = sband->bitrates[sta->last_tx_rate.idx].bitrate; tx_time = (device_constant + 10 * test_frame_len / rate); estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err)); result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 5d786720d935..3fa7ab285066 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -199,48 +199,44 @@ static void rate_control_release(struct kref *kref) } void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - struct sta_info *sta, struct sk_buff *skb, - struct rate_selection *sel) + struct sta_info *sta, + struct ieee80211_tx_rate_control *txrc) { struct rate_control_ref *ref = sdata->local->rate_ctrl; void *priv_sta = NULL; struct ieee80211_sta *ista = NULL; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); int i; - sel->rate_idx = -1; - sel->nonerp_idx = -1; - sel->probe_idx = -1; - sel->max_rate_idx = sdata->max_ratectrl_rateidx; - if (sta) { ista = &sta->sta; priv_sta = sta->rate_ctrl_priv; } + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { + info->control.rates[i].idx = -1; + info->control.rates[i].flags = 0; + info->control.rates[i].count = 1; + } + if (sta && sdata->force_unicast_rateidx > -1) - sel->rate_idx = sdata->force_unicast_rateidx; + info->control.rates[0].idx = sdata->force_unicast_rateidx; else - ref->ops->get_rate(ref->priv, sband, ista, priv_sta, skb, sel); - - if (sdata->max_ratectrl_rateidx > -1 && - sel->rate_idx > sdata->max_ratectrl_rateidx) - sel->rate_idx = sdata->max_ratectrl_rateidx; - - BUG_ON(sel->rate_idx < 0); - - /* Select a non-ERP backup rate. */ - if (sel->nonerp_idx < 0) { - for (i = 0; i < sband->n_bitrates; i++) { - struct ieee80211_rate *rate = &sband->bitrates[i]; - if (sband->bitrates[sel->rate_idx].bitrate < rate->bitrate) - break; - - if (rate_supported(ista, sband->band, i) && - !(rate->flags & IEEE80211_RATE_ERP_G)) - sel->nonerp_idx = i; - } + ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); + + /* + * try to enforce the maximum rate the user wanted + */ + if (sdata->max_ratectrl_rateidx > -1) + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { + if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS) + continue; + info->control.rates[i].idx = + min_t(s8, info->control.rates[i].idx, + sdata->max_ratectrl_rateidx); } + + BUG_ON(info->control.rates[0].idx < 0); } struct rate_control_ref *rate_control_get(struct rate_control_ref *ref) diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index d0092f847f82..7c25edf9ac55 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -31,9 +31,8 @@ struct rate_control_ref { struct rate_control_ref *rate_control_alloc(const char *name, struct ieee80211_local *local); void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - struct sta_info *sta, struct sk_buff *skb, - struct rate_selection *sel); + struct sta_info *sta, + struct ieee80211_tx_rate_control *txrc); struct rate_control_ref *rate_control_get(struct rate_control_ref *ref); void rate_control_put(struct rate_control_ref *ref); diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index f6d69dab07a3..759ddd8bf0f4 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -169,30 +169,20 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, { struct minstrel_sta_info *mi = priv_sta; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_tx_altrate *ar = info->status.retries; - struct minstrel_priv *mp = priv; - int i, ndx, tries; - int success = 0; + struct ieee80211_tx_rate *ar = info->status.rates; + int i, ndx; + int success; - if (!info->status.excessive_retries) - success = 1; + success = !!(info->flags & IEEE80211_TX_STAT_ACK); - if (!mp->has_mrr || (ar[0].rate_idx < 0)) { - ndx = rix_to_ndx(mi, info->tx_rate_idx); - tries = info->status.retry_count + 1; - mi->r[ndx].success += success; - mi->r[ndx].attempts += tries; - return; - } - - for (i = 0; i < 4; i++) { - if (ar[i].rate_idx < 0) + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { + if (ar[i].idx < 0) break; - ndx = rix_to_ndx(mi, ar[i].rate_idx); - mi->r[ndx].attempts += ar[i].limit + 1; + ndx = rix_to_ndx(mi, ar[i].idx); + mi->r[ndx].attempts += ar[i].count; - if ((i != 3) && (ar[i + 1].rate_idx < 0)) + if ((i != IEEE80211_TX_MAX_RATES - 1) && (ar[i + 1].idx < 0)) mi->r[ndx].success += success; } @@ -210,9 +200,9 @@ minstrel_get_retry_count(struct minstrel_rate *mr, { unsigned int retry = mr->adjusted_retry_count; - if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) + if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) retry = max(2U, min(mr->retry_count_rtscts, retry)); - else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) + else if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) retry = max(2U, min(mr->retry_count_cts, retry)); return retry; } @@ -234,14 +224,15 @@ minstrel_get_next_sample(struct minstrel_sta_info *mi) } void -minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, - struct sk_buff *skb, struct rate_selection *sel) +minstrel_get_rate(void *priv, struct ieee80211_sta *sta, + void *priv_sta, struct ieee80211_tx_rate_control *txrc) { + struct sk_buff *skb = txrc->skb; + struct ieee80211_supported_band *sband = txrc->sband; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct minstrel_sta_info *mi = priv_sta; struct minstrel_priv *mp = priv; - struct ieee80211_tx_altrate *ar = info->control.retries; + struct ieee80211_tx_rate *ar = info->control.rates; unsigned int ndx, sample_ndx = 0; bool mrr; bool sample_slower = false; @@ -251,16 +242,12 @@ minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband, int sample_rate; if (!sta || !mi || use_low_rate(skb)) { - sel->rate_idx = rate_lowest_index(sband, sta); + ar[0].idx = rate_lowest_index(sband, sta); + ar[0].count = mp->max_retry; return; } - mrr = mp->has_mrr; - - /* mac80211 does not allow mrr for RTS/CTS */ - if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) || - (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) - mrr = false; + mrr = mp->has_mrr && !txrc->rts && !txrc->bss_conf->use_cts_prot; if (time_after(jiffies, mi->stats_update + (mp->update_interval * HZ) / 1000)) @@ -315,13 +302,12 @@ minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband, mi->sample_deferred++; } } - sel->rate_idx = mi->r[ndx].rix; - info->control.retry_limit = minstrel_get_retry_count(&mi->r[ndx], info); + ar[0].idx = mi->r[ndx].rix; + ar[0].count = minstrel_get_retry_count(&mi->r[ndx], info); if (!mrr) { - ar[0].rate_idx = mi->lowest_rix; - ar[0].limit = mp->max_retry; - ar[1].rate_idx = -1; + ar[1].idx = mi->lowest_rix; + ar[1].count = mp->max_retry; return; } @@ -336,9 +322,9 @@ minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband, } mrr_ndx[1] = mi->max_prob_rate; mrr_ndx[2] = 0; - for (i = 0; i < 3; i++) { - ar[i].rate_idx = mi->r[mrr_ndx[i]].rix; - ar[i].limit = mi->r[mrr_ndx[i]].adjusted_retry_count; + for (i = 1; i < 4; i++) { + ar[i].idx = mi->r[mrr_ndx[i - 1]].rix; + ar[i].count = mi->r[mrr_ndx[i - 1]].adjusted_retry_count; } } @@ -532,13 +518,13 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) /* maximum time that the hw is allowed to stay in one MRR segment */ mp->segment_size = 6000; - if (hw->max_altrate_tries > 0) - mp->max_retry = hw->max_altrate_tries; + if (hw->max_rate_tries > 0) + mp->max_retry = hw->max_rate_tries; else /* safe default, does not necessarily have to match hw properties */ mp->max_retry = 7; - if (hw->max_altrates >= 3) + if (hw->max_rates >= 4) mp->has_mrr = true; mp->hw = hw; diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index ce099ea1d5d3..1a873f00691a 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h @@ -61,6 +61,7 @@ enum rc_pid_event_type { union rc_pid_event_data { /* RC_PID_EVENT_TX_STATUS */ struct { + u32 flags; struct ieee80211_tx_info tx_status; }; /* RC_PID_EVENT_TYPE_RATE_CHANGE */ diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index 86eb374e3b87..92caecfcee78 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -241,7 +241,7 @@ static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_ba /* Ignore all frames that were sent with a different rate than the rate * we currently advise mac80211 to use. */ - if (info->tx_rate_idx != spinfo->txrate_idx) + if (info->status.rates[0].idx != spinfo->txrate_idx) return; spinfo->tx_num_xmit++; @@ -253,10 +253,10 @@ static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_ba /* We count frames that totally failed to be transmitted as two bad * frames, those that made it out but had some retries as one good and * one bad frame. */ - if (info->status.excessive_retries) { + if (!(info->flags & IEEE80211_TX_STAT_ACK)) { spinfo->tx_num_failed += 2; spinfo->tx_num_xmit++; - } else if (info->status.retry_count) { + } else if (info->status.rates[0].count) { spinfo->tx_num_failed++; spinfo->tx_num_xmit++; } @@ -270,23 +270,32 @@ static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_ba } static void -rate_control_pid_get_rate(void *priv, struct ieee80211_supported_band *sband, - struct ieee80211_sta *sta, void *priv_sta, - struct sk_buff *skb, - struct rate_selection *sel) +rate_control_pid_get_rate(void *priv, struct ieee80211_sta *sta, + void *priv_sta, + struct ieee80211_tx_rate_control *txrc) { + struct sk_buff *skb = txrc->skb; + struct ieee80211_supported_band *sband = txrc->sband; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct rc_pid_sta_info *spinfo = priv_sta; int rateidx; u16 fc; + if (txrc->rts) + info->control.rates[0].count = + txrc->hw->conf.long_frame_max_tx_count; + else + info->control.rates[0].count = + txrc->hw->conf.short_frame_max_tx_count; + /* Send management frames and broadcast/multicast data using lowest * rate. */ fc = le16_to_cpu(hdr->frame_control); if (!sta || !spinfo || (fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || is_multicast_ether_addr(hdr->addr1)) { - sel->rate_idx = rate_lowest_index(sband, sta); + info->control.rates[0].idx = rate_lowest_index(sband, sta); return; } @@ -295,7 +304,7 @@ rate_control_pid_get_rate(void *priv, struct ieee80211_supported_band *sband, if (rateidx >= sband->n_bitrates) rateidx = sband->n_bitrates - 1; - sel->rate_idx = rateidx; + info->control.rates[0].idx = rateidx; #ifdef CONFIG_MAC80211_DEBUGFS rate_control_pid_event_tx_rate(&spinfo->events, diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c index 8121d3bc6835..a08a9b530347 100644 --- a/net/mac80211/rc80211_pid_debugfs.c +++ b/net/mac80211/rc80211_pid_debugfs.c @@ -43,6 +43,7 @@ void rate_control_pid_event_tx_status(struct rc_pid_event_buffer *buf, { union rc_pid_event_data evd; + evd.flags = stat->flags; memcpy(&evd.tx_status, stat, sizeof(struct ieee80211_tx_info)); rate_control_pid_event(buf, RC_PID_EVENT_TYPE_TX_STATUS, &evd); } @@ -167,8 +168,8 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf, switch (ev->type) { case RC_PID_EVENT_TYPE_TX_STATUS: p += snprintf(pb + p, length - p, "tx_status %u %u", - ev->data.tx_status.status.excessive_retries, - ev->data.tx_status.status.retry_count); + !(ev->data.flags & IEEE80211_TX_STAT_ACK), + ev->data.tx_status.status.rates[0].idx); break; case RC_PID_EVENT_TYPE_RATE_CHANGE: p += snprintf(pb + p, length - p, "rate_change %d %d", diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 168a39a298bd..4ac372aa75ce 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -196,7 +196,7 @@ struct sta_ampdu_mlme { * @tx_packets: number of RX/TX MSDUs * @tx_bytes: TBD * @tx_fragments: number of transmitted MPDUs - * @last_txrate_idx: Index of the last used transmit rate + * @last_txrate: description of the last used transmit rate * @tid_seq: TBD * @ampdu_mlme: TBD * @timer_to_tid: identity mapping to ID timers @@ -267,7 +267,7 @@ struct sta_info { unsigned long tx_packets; unsigned long tx_bytes; unsigned long tx_fragments; - unsigned int last_txrate_idx; + struct ieee80211_tx_rate last_tx_rate; u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; /* diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6f3e4be97631..21951bac1ef7 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -46,13 +46,20 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, struct ieee80211_local *local = tx->local; struct ieee80211_supported_band *sband; struct ieee80211_hdr *hdr; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + + /* assume HW handles this */ + if (info->control.rates[0].flags & IEEE80211_TX_RC_MCS) + return 0; + + /* uh huh? */ + if (WARN_ON_ONCE(info->control.rates[0].idx < 0)) + return 0; sband = local->hw.wiphy->bands[tx->channel->band]; - txrate = &sband->bitrates[tx->rate_idx]; + txrate = &sband->bitrates[info->control.rates[0].idx]; - erp = 0; - if (tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) - erp = txrate->flags & IEEE80211_RATE_ERP_G; + erp = txrate->flags & IEEE80211_RATE_ERP_G; /* * data and mgmt (except PS Poll): @@ -437,140 +444,154 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) static ieee80211_tx_result debug_noinline ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) { - struct rate_selection rsel; - struct ieee80211_supported_band *sband; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_hdr *hdr = (void *)tx->skb->data; + struct ieee80211_supported_band *sband; + struct ieee80211_rate *rate; + int i, len; + bool inval = false, rts = false, short_preamble = false; + struct ieee80211_tx_rate_control txrc; - sband = tx->local->hw.wiphy->bands[tx->channel->band]; + memset(&txrc, 0, sizeof(txrc)); - if (likely(tx->rate_idx < 0)) { - rate_control_get_rate(tx->sdata, sband, tx->sta, - tx->skb, &rsel); - if (tx->sta) - tx->sta->last_txrate_idx = rsel.rate_idx; - tx->rate_idx = rsel.rate_idx; - if (unlikely(rsel.probe_idx >= 0)) { - info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; - tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG; - info->control.retries[0].rate_idx = tx->rate_idx; - info->control.retries[0].limit = tx->local->hw.max_altrate_tries; - tx->rate_idx = rsel.probe_idx; - } else if (info->control.retries[0].limit == 0) - info->control.retries[0].rate_idx = -1; - - if (unlikely(tx->rate_idx < 0)) - return TX_DROP; - } else - info->control.retries[0].rate_idx = -1; + sband = tx->local->hw.wiphy->bands[tx->channel->band]; - if (tx->sdata->vif.bss_conf.use_cts_prot && - (tx->flags & IEEE80211_TX_FRAGMENTED) && (rsel.nonerp_idx >= 0)) { - tx->last_frag_rate_idx = tx->rate_idx; - if (rsel.probe_idx >= 0) - tx->flags &= ~IEEE80211_TX_PROBE_LAST_FRAG; - else - tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG; - tx->rate_idx = rsel.nonerp_idx; - info->tx_rate_idx = rsel.nonerp_idx; - info->flags &= ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; - } else { - tx->last_frag_rate_idx = tx->rate_idx; - info->tx_rate_idx = tx->rate_idx; + len = min_t(int, tx->skb->len + FCS_LEN, + tx->local->fragmentation_threshold); + + /* set up the tx rate control struct we give the RC algo */ + txrc.hw = local_to_hw(tx->local); + txrc.sband = sband; + txrc.bss_conf = &tx->sdata->vif.bss_conf; + txrc.skb = tx->skb; + txrc.reported_rate.idx = -1; + txrc.max_rate_idx = tx->sdata->max_ratectrl_rateidx; + + /* set up RTS protection if desired */ + if (tx->local->rts_threshold < IEEE80211_MAX_RTS_THRESHOLD && + len > tx->local->rts_threshold) { + txrc.rts = rts = true; } - info->tx_rate_idx = tx->rate_idx; - return TX_CONTINUE; -} + /* + * Use short preamble if the BSS can handle it, but not for + * management frames unless we know the receiver can handle + * that -- the management frame might be to a station that + * just wants a probe response. + */ + if (tx->sdata->vif.bss_conf.use_short_preamble && + (ieee80211_is_data(hdr->frame_control) || + (tx->sta && test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE)))) + txrc.short_preamble = short_preamble = true; -static ieee80211_tx_result debug_noinline -ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - struct ieee80211_supported_band *sband; - sband = tx->local->hw.wiphy->bands[tx->channel->band]; + rate_control_get_rate(tx->sdata, tx->sta, &txrc); + + if (unlikely(info->control.rates[0].idx < 0)) + return TX_DROP; + + if (txrc.reported_rate.idx < 0) + txrc.reported_rate = info->control.rates[0]; if (tx->sta) - info->control.sta = &tx->sta->sta; + tx->sta->last_tx_rate = txrc.reported_rate; - if (!info->control.retry_limit) { - if (!is_multicast_ether_addr(hdr->addr1)) { - int len = min_t(int, tx->skb->len + FCS_LEN, - tx->local->fragmentation_threshold); - if (len > tx->local->rts_threshold - && tx->local->rts_threshold < - IEEE80211_MAX_RTS_THRESHOLD) { - info->flags |= IEEE80211_TX_CTL_USE_RTS_CTS; - info->flags |= - IEEE80211_TX_CTL_LONG_RETRY_LIMIT; - info->control.retry_limit = - tx->local->hw.conf.long_frame_max_tx_count - 1; - } else { - info->control.retry_limit = - tx->local->hw.conf.short_frame_max_tx_count - 1; - } - } else { - info->control.retry_limit = 1; - } - } + if (unlikely(!info->control.rates[0].count)) + info->control.rates[0].count = 1; - if (tx->flags & IEEE80211_TX_FRAGMENTED) { - /* Do not use multiple retry rates when sending fragmented - * frames. - * TODO: The last fragment could still use multiple retry - * rates. */ - info->control.retries[0].rate_idx = -1; + if (is_multicast_ether_addr(hdr->addr1)) { + /* + * XXX: verify the rate is in the basic rateset + */ + return TX_CONTINUE; } - /* Use CTS protection for unicast frames sent using extended rates if - * there are associated non-ERP stations and RTS/CTS is not configured - * for the frame. */ - if ((tx->sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) && - (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_ERP_G) && - (tx->flags & IEEE80211_TX_UNICAST) && - tx->sdata->vif.bss_conf.use_cts_prot && - !(info->flags & IEEE80211_TX_CTL_USE_RTS_CTS)) - info->flags |= IEEE80211_TX_CTL_USE_CTS_PROTECT; - - /* Transmit data frames using short preambles if the driver supports - * short preambles at the selected rate and short preambles are - * available on the network at the current point in time. */ - if (ieee80211_is_data(hdr->frame_control) && - (sband->bitrates[tx->rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) && - tx->sdata->vif.bss_conf.use_short_preamble && - (!tx->sta || test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE))) { - info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE; + /* + * set up the RTS/CTS rate as the fastest basic rate + * that is not faster than the data rate + * + * XXX: Should this check all retry rates? + */ + if (!(info->control.rates[0].flags & IEEE80211_TX_RC_MCS)) { + s8 baserate = 0; + + rate = &sband->bitrates[info->control.rates[0].idx]; + + for (i = 0; i < sband->n_bitrates; i++) { + /* must be a basic rate */ + if (!(tx->sdata->vif.bss_conf.basic_rates & BIT(i))) + continue; + /* must not be faster than the data rate */ + if (sband->bitrates[i].bitrate > rate->bitrate) + continue; + /* maximum */ + if (sband->bitrates[baserate].bitrate < + sband->bitrates[i].bitrate) + baserate = i; + } + + info->control.rts_cts_rate_idx = baserate; } - if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) || - (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) { - struct ieee80211_rate *rate; - s8 baserate = -1; - int idx; + for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { + /* + * make sure there's no valid rate following + * an invalid one, just in case drivers don't + * take the API seriously to stop at -1. + */ + if (inval) { + info->control.rates[i].idx = -1; + continue; + } + if (info->control.rates[i].idx < 0) { + inval = true; + continue; + } - /* Do not use multiple retry rates when using RTS/CTS */ - info->control.retries[0].rate_idx = -1; + /* + * For now assume MCS is already set up correctly, this + * needs to be fixed. + */ + if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS) { + WARN_ON(info->control.rates[i].idx > 76); + continue; + } - /* Use min(data rate, max base rate) as CTS/RTS rate */ - rate = &sband->bitrates[tx->rate_idx]; + /* set up RTS protection if desired */ + if (rts) + info->control.rates[i].flags |= + IEEE80211_TX_RC_USE_RTS_CTS; - for (idx = 0; idx < sband->n_bitrates; idx++) { - if (sband->bitrates[idx].bitrate > rate->bitrate) - continue; - if (tx->sdata->vif.bss_conf.basic_rates & BIT(idx) && - (baserate < 0 || - (sband->bitrates[baserate].bitrate - < sband->bitrates[idx].bitrate))) - baserate = idx; + /* RC is busted */ + if (WARN_ON(info->control.rates[i].idx >= + sband->n_bitrates)) { + info->control.rates[i].idx = -1; + continue; } - if (baserate >= 0) - info->control.rts_cts_rate_idx = baserate; - else - info->control.rts_cts_rate_idx = 0; + rate = &sband->bitrates[info->control.rates[i].idx]; + + /* set up short preamble */ + if (short_preamble && + rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) + info->control.rates[i].flags |= + IEEE80211_TX_RC_USE_SHORT_PREAMBLE; + + /* set up G protection */ + if (!rts && tx->sdata->vif.bss_conf.use_cts_prot && + rate->flags & IEEE80211_RATE_ERP_G) + info->control.rates[i].flags |= + IEEE80211_TX_RC_USE_CTS_PROTECT; } + return TX_CONTINUE; +} + +static ieee80211_tx_result debug_noinline +ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + if (tx->sta) info->control.sta = &tx->sta->sta; @@ -678,6 +699,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) left = payload_len - per_fragm; for (i = 0; i < num_fragm - 1; i++) { struct ieee80211_hdr *fhdr; + struct ieee80211_tx_info *info; size_t copylen; if (left <= 0) @@ -692,20 +714,45 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) IEEE80211_ENCRYPT_TAILROOM); if (!frag) goto fail; + /* Make sure that all fragments use the same priority so * that they end up using the same TX queue */ frag->priority = first->priority; + skb_reserve(frag, tx->local->tx_headroom + IEEE80211_ENCRYPT_HEADROOM); + + /* copy TX information */ + info = IEEE80211_SKB_CB(frag); + memcpy(info, first->cb, sizeof(frag->cb)); + + /* copy/fill in 802.11 header */ fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen); memcpy(fhdr, first->data, hdrlen); - if (i == num_fragm - 2) - fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS); fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG)); + + if (i == num_fragm - 2) { + /* clear MOREFRAGS bit for the last fragment */ + fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS); + } else { + /* + * No multi-rate retries for fragmented frames, that + * would completely throw off the NAV at other STAs. + */ + info->control.rates[1].idx = -1; + info->control.rates[2].idx = -1; + info->control.rates[3].idx = -1; + info->control.rates[4].idx = -1; + BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 5); + info->flags &= ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; + } + + /* copy data */ copylen = left > per_fragm ? per_fragm : left; memcpy(skb_put(frag, copylen), pos, copylen); - memcpy(frag->cb, first->cb, sizeof(frag->cb)); + skb_copy_queue_mapping(frag, first); + frag->do_not_encrypt = first->do_not_encrypt; pos += copylen; @@ -765,12 +812,10 @@ ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx) tx->extra_frag[0]->len); for (i = 0; i < tx->num_extra_frag; i++) { - if (i + 1 < tx->num_extra_frag) { + if (i + 1 < tx->num_extra_frag) next_len = tx->extra_frag[i + 1]->len; - } else { + else next_len = 0; - tx->rate_idx = tx->last_frag_rate_idx; - } hdr = (struct ieee80211_hdr *)tx->extra_frag[i]->data; hdr->duration_id = ieee80211_duration(tx, 0, next_len); @@ -823,7 +868,6 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, (struct ieee80211_radiotap_header *) skb->data; struct ieee80211_supported_band *sband; int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len); - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); sband = tx->local->hw.wiphy->bands[tx->channel->band]; @@ -837,8 +881,6 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, */ while (!ret) { - int i, target_rate; - ret = ieee80211_radiotap_iterator_next(&iterator); if (ret) @@ -852,38 +894,6 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, * get_unaligned((type *)iterator.this_arg) to dereference * iterator.this_arg for type "type" safely on all arches. */ - case IEEE80211_RADIOTAP_RATE: - /* - * radiotap rate u8 is in 500kbps units eg, 0x02=1Mbps - * ieee80211 rate int is in 100kbps units eg, 0x0a=1Mbps - */ - target_rate = (*iterator.this_arg) * 5; - for (i = 0; i < sband->n_bitrates; i++) { - struct ieee80211_rate *r; - - r = &sband->bitrates[i]; - - if (r->bitrate == target_rate) { - tx->rate_idx = i; - break; - } - } - break; - - case IEEE80211_RADIOTAP_ANTENNA: - /* - * radiotap uses 0 for 1st ant, mac80211 is 1 for - * 1st ant - */ - info->antenna_sel_tx = (*iterator.this_arg) + 1; - break; - -#if 0 - case IEEE80211_RADIOTAP_DBM_TX_POWER: - control->power_level = *iterator.this_arg; - break; -#endif - case IEEE80211_RADIOTAP_FLAGS: if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FCS) { /* @@ -949,8 +959,6 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, tx->local = local; tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev); tx->channel = local->hw.conf.channel; - tx->rate_idx = -1; - tx->last_frag_rate_idx = -1; /* * Set this flag (used below to indicate "automatic fragmentation"), * it will be cleared/left by radiotap as desired. @@ -1051,23 +1059,11 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, if (!tx->extra_frag[i]) continue; info = IEEE80211_SKB_CB(tx->extra_frag[i]); - info->flags &= ~(IEEE80211_TX_CTL_USE_RTS_CTS | - IEEE80211_TX_CTL_USE_CTS_PROTECT | - IEEE80211_TX_CTL_CLEAR_PS_FILT | + info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT | IEEE80211_TX_CTL_FIRST_FRAGMENT); if (netif_subqueue_stopped(local->mdev, tx->extra_frag[i])) return IEEE80211_TX_FRAG_AGAIN; - if (i == tx->num_extra_frag) { - info->tx_rate_idx = tx->last_frag_rate_idx; - - if (tx->flags & IEEE80211_TX_PROBE_LAST_FRAG) - info->flags |= - IEEE80211_TX_CTL_RATE_CTRL_PROBE; - else - info->flags &= - ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; - } ret = local->ops->tx(local_to_hw(local), tx->extra_frag[i]); @@ -1204,9 +1200,6 @@ retry: store->skb = skb; store->extra_frag = tx.extra_frag; store->num_extra_frag = tx.num_extra_frag; - store->last_frag_rate_idx = tx.last_frag_rate_idx; - store->last_frag_rate_ctrl_probe = - !!(tx.flags & IEEE80211_TX_PROBE_LAST_FRAG); } out: rcu_read_unlock(); @@ -1763,10 +1756,7 @@ void ieee80211_tx_pending(unsigned long data) store = &local->pending_packet[i]; tx.extra_frag = store->extra_frag; tx.num_extra_frag = store->num_extra_frag; - tx.last_frag_rate_idx = store->last_frag_rate_idx; tx.flags = 0; - if (store->last_frag_rate_ctrl_probe) - tx.flags |= IEEE80211_TX_PROBE_LAST_FRAG; ret = __ieee80211_tx(local, store->skb, &tx); if (ret) { if (ret == IEEE80211_TX_FRAG_AGAIN) @@ -1854,7 +1844,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, struct ieee80211_sub_if_data *sdata = NULL; struct ieee80211_if_ap *ap = NULL; struct ieee80211_if_sta *ifsta = NULL; - struct rate_selection rsel; struct beacon_data *beacon; struct ieee80211_supported_band *sband; enum ieee80211_band band = local->hw.conf.channel->band; @@ -1958,32 +1947,23 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, skb->do_not_encrypt = 1; info->band = band; - rate_control_get_rate(sdata, sband, NULL, skb, &rsel); - - if (unlikely(rsel.rate_idx < 0)) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: ieee80211_beacon_get: " - "no rate found\n", - wiphy_name(local->hw.wiphy)); - } - dev_kfree_skb_any(skb); - skb = NULL; - goto out; - } + /* + * XXX: For now, always use the lowest rate + */ + info->control.rates[0].idx = 0; + info->control.rates[0].count = 1; + info->control.rates[1].idx = -1; + info->control.rates[2].idx = -1; + info->control.rates[3].idx = -1; + info->control.rates[4].idx = -1; + BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 5); info->control.vif = vif; - info->tx_rate_idx = rsel.rate_idx; info->flags |= IEEE80211_TX_CTL_NO_ACK; info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; - if (sdata->vif.bss_conf.use_short_preamble && - sband->bitrates[rsel.rate_idx].flags & IEEE80211_RATE_SHORT_PREAMBLE) - info->flags |= IEEE80211_TX_CTL_SHORT_PREAMBLE; - - info->control.retry_limit = 1; - -out: + out: rcu_read_unlock(); return skb; } diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index f7e442f80a17..31d2e74a1bc0 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -636,8 +636,8 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev, sta = sta_info_get(local, sdata->u.sta.bssid); - if (sta && sta->last_txrate_idx < sband->n_bitrates) - rate->value = sband->bitrates[sta->last_txrate_idx].bitrate; + if (sta && !(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) + rate->value = sband->bitrates[sta->last_tx_rate.idx].bitrate; else rate->value = 0; -- cgit v1.2.3 From 50fb2e4572141770380f5919793c6e575fa3474b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 Oct 2008 11:21:49 +0200 Subject: mac80211: remove rate_control_clear "Clearing" the rate control algorithm is pointless, none of the algorithms actually uses this operation and it's not even invoked properly for all channel switching. Also, there's no need to since rate control algorithms work per station. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/rc.c | 6 ------ drivers/net/wireless/iwlwifi/iwl-3945-rs.c | 7 ------- drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 14 -------------- include/net/mac80211.h | 1 - net/mac80211/rate.h | 6 ------ net/mac80211/rc80211_minstrel.c | 6 ------ net/mac80211/rc80211_pid_algo.c | 5 ----- net/mac80211/util.c | 2 -- 8 files changed, 47 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath9k/rc.c b/drivers/net/wireless/ath9k/rc.c index 6afafeddeda2..9ce535915a1a 100644 --- a/drivers/net/wireless/ath9k/rc.c +++ b/drivers/net/wireless/ath9k/rc.c @@ -2038,11 +2038,6 @@ static void ath_rate_init(void *priv, struct ieee80211_supported_band *sband, ath_rc_node_update(sc->hw, priv_sta); } -static void ath_rate_clear(void *priv) -{ - return; -} - static void *ath_rate_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) { struct ath_softc *sc = hw->priv; @@ -2092,7 +2087,6 @@ static struct rate_control_ops ath_rate_ops = { .tx_status = ath_tx_status, .get_rate = ath_get_rate, .rate_init = ath_rate_init, - .clear = ath_rate_clear, .alloc = ath_rate_alloc, .free = ath_rate_free, .alloc_sta = ath_rate_alloc_sta, diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c index f440ed0fe543..bfeef701b1fd 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c @@ -355,12 +355,6 @@ static void rs_free(void *priv) return; } -static void rs_clear(void *priv) -{ - return; -} - - static void *rs_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) { struct iwl3945_rs_sta *rs_sta; @@ -784,7 +778,6 @@ static struct rate_control_ops rs_ops = { .tx_status = rs_tx_status, .get_rate = rs_get_rate, .rate_init = rs_rate_init, - .clear = rs_clear, .alloc = rs_alloc, .free = rs_free, .alloc_sta = rs_alloc_sta, diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c index f685e5d6c281..6fdb2fb755b0 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c @@ -2393,19 +2393,6 @@ static void rs_free(void *priv_rate) return; } -static void rs_clear(void *priv_rate) -{ -#ifdef CONFIG_IWLWIFI_DEBUG - struct iwl_priv *priv = (struct iwl_priv *) priv_rate; - - IWL_DEBUG_RATE("enter\n"); - - /* TODO - add rate scale state reset */ - - IWL_DEBUG_RATE("leave\n"); -#endif /* CONFIG_IWLWIFI_DEBUG */ -} - static void rs_free_sta(void *priv_r, struct ieee80211_sta *sta, void *priv_sta) { @@ -2593,7 +2580,6 @@ static struct rate_control_ops rs_ops = { .tx_status = rs_tx_status, .get_rate = rs_get_rate, .rate_init = rs_rate_init, - .clear = rs_clear, .alloc = rs_alloc, .free = rs_free, .alloc_sta = rs_alloc_sta, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3741c0a7978a..e0b1ff9a3148 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1917,7 +1917,6 @@ struct rate_control_ops { struct module *module; const char *name; void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir); - void (*clear)(void *priv); void (*free)(void *priv); void *(*alloc_sta)(void *priv, struct ieee80211_sta *sta, gfp_t gfp); diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 7c25edf9ac55..928da625e281 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -63,12 +63,6 @@ static inline void rate_control_rate_init(struct sta_info *sta) } -static inline void rate_control_clear(struct ieee80211_local *local) -{ - struct rate_control_ref *ref = local->rate_ctrl; - ref->ops->clear(ref->priv); -} - static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, struct ieee80211_sta *sta, gfp_t gfp) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index c10debc29ad6..c643e373fc50 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -507,11 +507,6 @@ minstrel_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) kfree(mi); } -static void -minstrel_clear(void *priv) -{ -} - static void * minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) { @@ -565,7 +560,6 @@ static struct rate_control_ops mac80211_minstrel = { .tx_status = minstrel_tx_status, .get_rate = minstrel_get_rate, .rate_init = minstrel_rate_init, - .clear = minstrel_clear, .alloc = minstrel_alloc, .free = minstrel_free, .alloc_sta = minstrel_alloc_sta, diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index 92caecfcee78..2328ba568039 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -446,10 +446,6 @@ static void rate_control_pid_free(void *priv) kfree(pinfo); } -static void rate_control_pid_clear(void *priv) -{ -} - static void *rate_control_pid_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) { @@ -480,7 +476,6 @@ static struct rate_control_ops mac80211_rcpid = { .tx_status = rate_control_pid_tx_status, .get_rate = rate_control_pid_get_rate, .rate_init = rate_control_pid_rate_init, - .clear = rate_control_pid_clear, .alloc = rate_control_pid_alloc, .free = rate_control_pid_free, .alloc_sta = rate_control_pid_alloc_sta, diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ec8b6335f0c1..0f841317c7e9 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -647,8 +647,6 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz) else ret = ieee80211_hw_config( local, IEEE80211_CONF_CHANGE_CHANNEL); - - rate_control_clear(local); } return ret; -- cgit v1.2.3 From 93da9cc17c5ae8a751886fd4732db89ad5e9bdb9 Mon Sep 17 00:00:00 2001 From: "colin@cozybit.com" Date: Tue, 21 Oct 2008 12:03:48 -0700 Subject: Add nl80211 commands to get and set o11s mesh networking parameters The two new commands are NL80211_CMD_GET_MESH_PARAMS and NL80211_CMD_SET_MESH_PARAMS. There is a new attribute enum, NL80211_ATTR_MESH_PARAMS, which enumerates the various mesh configuration parameters. Moved struct mesh_config from mac80211/ieee80211_i.h to net/cfg80211.h. nl80211_get_mesh_params and nl80211_set_mesh_params unpack the netlink messages and ask the driver to get or set the configuration. This is done via two new function stubs, get_mesh_params and set_mesh_params, in struct cfg80211_ops. Signed-off-by: Colin McCabe Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 86 ++++++++++++++++++++ include/net/cfg80211.h | 32 +++++++- net/mac80211/cfg.c | 68 ++++++++++++++++ net/mac80211/ieee80211_i.h | 21 +---- net/wireless/nl80211.c | 191 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 377 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 41720d47d618..e4cc7869b22f 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -106,6 +106,12 @@ * to the the specified ISO/IEC 3166-1 alpha2 country code. The core will * store this as a valid request and then query userspace for it. * + * @NL80211_CMD_GET_MESH_PARAMS: Get mesh networking properties for the + * interface identified by %NL80211_ATTR_IFINDEX + * + * @NL80211_CMD_SET_MESH_PARAMS: Set mesh networking properties for the + * interface identified by %NL80211_ATTR_IFINDEX + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -148,6 +154,9 @@ enum nl80211_commands { NL80211_CMD_SET_REG, NL80211_CMD_REQ_SET_REG, + NL80211_CMD_GET_MESH_PARAMS, + NL80211_CMD_SET_MESH_PARAMS, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -296,6 +305,8 @@ enum nl80211_attrs { NL80211_ATTR_REG_ALPHA2, NL80211_ATTR_REG_RULES, + NL80211_ATTR_MESH_PARAMS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -606,4 +617,79 @@ enum nl80211_mntr_flags { NL80211_MNTR_FLAG_MAX = __NL80211_MNTR_FLAG_AFTER_LAST - 1 }; +/** + * enum nl80211_meshconf_params - mesh configuration parameters + * + * Mesh configuration parameters + * + * @__NL80211_MESHCONF_INVALID: internal use + * + * @NL80211_MESHCONF_RETRY_TIMEOUT: specifies the initial retry timeout in + * millisecond units, used by the Peer Link Open message + * + * @NL80211_MESHCONF_CONFIRM_TIMEOUT: specifies the inital confirm timeout, in + * millisecond units, used by the peer link management to close a peer link + * + * @NL80211_MESHCONF_HOLDING_TIMEOUT: specifies the holding timeout, in + * millisecond units + * + * @NL80211_MESHCONF_MAX_PEER_LINKS: maximum number of peer links allowed + * on this mesh interface + * + * @NL80211_MESHCONF_MAX_RETRIES: specifies the maximum number of peer link + * open retries that can be sent to establish a new peer link instance in a + * mesh + * + * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh + * point. + * + * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically + * open peer links when we detect compatible mesh peers. + * + * @NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES: the number of action frames + * containing a PREQ that an MP can send to a particular destination (path + * target) + * + * @NL80211_MESHCONF_PATH_REFRESH_TIME: how frequently to refresh mesh paths + * (in milliseconds) + * + * @NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT: minimum length of time to wait + * until giving up on a path discovery (in milliseconds) + * + * @NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT: The time (in TUs) for which mesh + * points receiving a PREQ shall consider the forwarding information from the + * root to be valid. (TU = time unit) + * + * @NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL: The minimum interval of time (in + * TUs) during which an MP can send only one action frame containing a PREQ + * reference element + * + * @NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME: The interval of time (in TUs) + * that it takes for an HWMP information element to propagate across the mesh + * + * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute + * + * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use + */ +enum nl80211_meshconf_params { + __NL80211_MESHCONF_INVALID, + NL80211_MESHCONF_RETRY_TIMEOUT, + NL80211_MESHCONF_CONFIRM_TIMEOUT, + NL80211_MESHCONF_HOLDING_TIMEOUT, + NL80211_MESHCONF_MAX_PEER_LINKS, + NL80211_MESHCONF_MAX_RETRIES, + NL80211_MESHCONF_TTL, + NL80211_MESHCONF_AUTO_OPEN_PLINKS, + NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, + NL80211_MESHCONF_PATH_REFRESH_TIME, + NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, + NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, + NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, + NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, + + /* keep last */ + __NL80211_MESHCONF_ATTR_AFTER_LAST, + NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1 +}; + #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0e85ec39b638..03e1e88c6a09 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -347,6 +347,25 @@ struct ieee80211_regdomain { .flags = reg_flags, \ } +struct mesh_config { + /* Timeouts in ms */ + /* Mesh plink management parameters */ + u16 dot11MeshRetryTimeout; + u16 dot11MeshConfirmTimeout; + u16 dot11MeshHoldingTimeout; + u16 dot11MeshMaxPeerLinks; + u8 dot11MeshMaxRetries; + u8 dot11MeshTTL; + bool auto_open_plinks; + /* HWMP parameters */ + u8 dot11MeshHWMPmaxPREQretries; + u32 path_refresh_time; + u16 min_discovery_timeout; + u32 dot11MeshHWMPactivePathTimeout; + u16 dot11MeshHWMPpreqMinInterval; + u16 dot11MeshHWMPnetDiameterTraversalTime; +}; + /* from net/wireless.h */ struct wiphy; @@ -397,6 +416,12 @@ struct wiphy; * * @change_station: Modify a given station. * + * @get_mesh_params: Put the current mesh parameters into *params + * + * @set_mesh_params: Set mesh parameters. + * The mask is a bitfield which tells us which parameters to + * set, and which to leave alone. + * * @set_mesh_cfg: set mesh parameters (by now, just mesh id) * * @change_bss: Modify parameters for a given BSS. @@ -452,7 +477,12 @@ struct cfg80211_ops { int (*dump_mpath)(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *next_hop, struct mpath_info *pinfo); - + int (*get_mesh_params)(struct wiphy *wiphy, + struct net_device *dev, + struct mesh_config *conf); + int (*set_mesh_params)(struct wiphy *wiphy, + struct net_device *dev, + const struct mesh_config *nconf, u32 mask); int (*change_bss)(struct wiphy *wiphy, struct net_device *dev, struct bss_parameters *params); }; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 55e3a26510ed..91f56a48e2b4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -951,6 +951,72 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, rcu_read_unlock(); return 0; } + +static int ieee80211_get_mesh_params(struct wiphy *wiphy, + struct net_device *dev, + struct mesh_config *conf) +{ + struct ieee80211_sub_if_data *sdata; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) + return -ENOTSUPP; + memcpy(conf, &(sdata->u.mesh.mshcfg), sizeof(struct mesh_config)); + return 0; +} + +static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask) +{ + return (mask >> (parm-1)) & 0x1; +} + +static int ieee80211_set_mesh_params(struct wiphy *wiphy, + struct net_device *dev, + const struct mesh_config *nconf, u32 mask) +{ + struct mesh_config *conf; + struct ieee80211_sub_if_data *sdata; + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) + return -ENOTSUPP; + + /* Set the config options which we are interested in setting */ + conf = &(sdata->u.mesh.mshcfg); + if (_chg_mesh_attr(NL80211_MESHCONF_RETRY_TIMEOUT, mask)) + conf->dot11MeshRetryTimeout = nconf->dot11MeshRetryTimeout; + if (_chg_mesh_attr(NL80211_MESHCONF_CONFIRM_TIMEOUT, mask)) + conf->dot11MeshConfirmTimeout = nconf->dot11MeshConfirmTimeout; + if (_chg_mesh_attr(NL80211_MESHCONF_HOLDING_TIMEOUT, mask)) + conf->dot11MeshHoldingTimeout = nconf->dot11MeshHoldingTimeout; + if (_chg_mesh_attr(NL80211_MESHCONF_MAX_PEER_LINKS, mask)) + conf->dot11MeshMaxPeerLinks = nconf->dot11MeshMaxPeerLinks; + if (_chg_mesh_attr(NL80211_MESHCONF_MAX_RETRIES, mask)) + conf->dot11MeshMaxRetries = nconf->dot11MeshMaxRetries; + if (_chg_mesh_attr(NL80211_MESHCONF_TTL, mask)) + conf->dot11MeshTTL = nconf->dot11MeshTTL; + if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) + conf->auto_open_plinks = nconf->auto_open_plinks; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, mask)) + conf->dot11MeshHWMPmaxPREQretries = + nconf->dot11MeshHWMPmaxPREQretries; + if (_chg_mesh_attr(NL80211_MESHCONF_PATH_REFRESH_TIME, mask)) + conf->path_refresh_time = nconf->path_refresh_time; + if (_chg_mesh_attr(NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, mask)) + conf->min_discovery_timeout = nconf->min_discovery_timeout; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, mask)) + conf->dot11MeshHWMPactivePathTimeout = + nconf->dot11MeshHWMPactivePathTimeout; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, mask)) + conf->dot11MeshHWMPpreqMinInterval = + nconf->dot11MeshHWMPpreqMinInterval; + if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, + mask)) + conf->dot11MeshHWMPnetDiameterTraversalTime = + nconf->dot11MeshHWMPnetDiameterTraversalTime; + return 0; +} + #endif static int ieee80211_change_bss(struct wiphy *wiphy, @@ -1007,6 +1073,8 @@ struct cfg80211_ops mac80211_config_ops = { .change_mpath = ieee80211_change_mpath, .get_mpath = ieee80211_get_mpath, .dump_mpath = ieee80211_dump_mpath, + .set_mesh_params = ieee80211_set_mesh_params, + .get_mesh_params = ieee80211_get_mesh_params, #endif .change_bss = ieee80211_change_bss, }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index fe4efdd4253d..2c91108e3901 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -247,26 +248,6 @@ struct mesh_preq_queue { u8 flags; }; -struct mesh_config { - /* Timeouts in ms */ - /* Mesh plink management parameters */ - u16 dot11MeshRetryTimeout; - u16 dot11MeshConfirmTimeout; - u16 dot11MeshHoldingTimeout; - u16 dot11MeshMaxPeerLinks; - u8 dot11MeshMaxRetries; - u8 dot11MeshTTL; - bool auto_open_plinks; - /* HWMP parameters */ - u8 dot11MeshHWMPmaxPREQretries; - u32 path_refresh_time; - u16 min_discovery_timeout; - u32 dot11MeshHWMPactivePathTimeout; - u16 dot11MeshHWMPpreqMinInterval; - u16 dot11MeshHWMPnetDiameterTraversalTime; -}; - - /* flags used in struct ieee80211_if_sta.flags */ #define IEEE80211_STA_SSID_SET BIT(0) #define IEEE80211_STA_BSSID_SET BIT(1) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2b87aec231ea..9a16e9e6c5ca 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -96,6 +96,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 }, + [NL80211_ATTR_MESH_PARAMS] = { .type = NLA_NESTED }, + [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY, .len = NL80211_HT_CAPABILITY_LEN }, }; @@ -1698,6 +1700,183 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) return r; } +static int nl80211_get_mesh_params(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + struct mesh_config cur_params; + int err; + struct net_device *dev; + void *hdr; + struct nlattr *pinfoattr; + struct sk_buff *msg; + + /* Look up our device */ + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + return err; + + /* Get the mesh params */ + rtnl_lock(); + err = drv->ops->get_mesh_params(&drv->wiphy, dev, &cur_params); + rtnl_unlock(); + if (err) + goto out; + + /* Draw up a netlink message to send back */ + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) { + err = -ENOBUFS; + goto out; + } + hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + NL80211_CMD_GET_MESH_PARAMS); + if (!hdr) + goto nla_put_failure; + pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_PARAMS); + if (!pinfoattr) + goto nla_put_failure; + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); + NLA_PUT_U16(msg, NL80211_MESHCONF_RETRY_TIMEOUT, + cur_params.dot11MeshRetryTimeout); + NLA_PUT_U16(msg, NL80211_MESHCONF_CONFIRM_TIMEOUT, + cur_params.dot11MeshConfirmTimeout); + NLA_PUT_U16(msg, NL80211_MESHCONF_HOLDING_TIMEOUT, + cur_params.dot11MeshHoldingTimeout); + NLA_PUT_U16(msg, NL80211_MESHCONF_MAX_PEER_LINKS, + cur_params.dot11MeshMaxPeerLinks); + NLA_PUT_U8(msg, NL80211_MESHCONF_MAX_RETRIES, + cur_params.dot11MeshMaxRetries); + NLA_PUT_U8(msg, NL80211_MESHCONF_TTL, + cur_params.dot11MeshTTL); + NLA_PUT_U8(msg, NL80211_MESHCONF_AUTO_OPEN_PLINKS, + cur_params.auto_open_plinks); + NLA_PUT_U8(msg, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, + cur_params.dot11MeshHWMPmaxPREQretries); + NLA_PUT_U32(msg, NL80211_MESHCONF_PATH_REFRESH_TIME, + cur_params.path_refresh_time); + NLA_PUT_U16(msg, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, + cur_params.min_discovery_timeout); + NLA_PUT_U32(msg, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, + cur_params.dot11MeshHWMPactivePathTimeout); + NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, + cur_params.dot11MeshHWMPpreqMinInterval); + NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, + cur_params.dot11MeshHWMPnetDiameterTraversalTime); + nla_nest_end(msg, pinfoattr); + genlmsg_end(msg, hdr); + err = genlmsg_unicast(msg, info->snd_pid); + goto out; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + err = -EMSGSIZE; +out: + /* Cleanup */ + cfg80211_put_dev(drv); + dev_put(dev); + return err; +} + +#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \ +do {\ + if (table[attr_num]) {\ + cfg.param = nla_fn(table[attr_num]); \ + mask |= (1 << (attr_num - 1)); \ + } \ +} while (0);\ + +static struct nla_policy +nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] __read_mostly = { + [NL80211_MESHCONF_RETRY_TIMEOUT] = { .type = NLA_U16 }, + [NL80211_MESHCONF_CONFIRM_TIMEOUT] = { .type = NLA_U16 }, + [NL80211_MESHCONF_HOLDING_TIMEOUT] = { .type = NLA_U16 }, + [NL80211_MESHCONF_MAX_PEER_LINKS] = { .type = NLA_U16 }, + [NL80211_MESHCONF_MAX_RETRIES] = { .type = NLA_U8 }, + [NL80211_MESHCONF_TTL] = { .type = NLA_U8 }, + [NL80211_MESHCONF_AUTO_OPEN_PLINKS] = { .type = NLA_U8 }, + + [NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES] = { .type = NLA_U8 }, + [NL80211_MESHCONF_PATH_REFRESH_TIME] = { .type = NLA_U32 }, + [NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT] = { .type = NLA_U16 }, + [NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT] = { .type = NLA_U32 }, + [NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL] = { .type = NLA_U16 }, + [NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 }, +}; + +static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) +{ + int err; + u32 mask; + struct cfg80211_registered_device *drv; + struct net_device *dev; + struct mesh_config cfg; + struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1]; + struct nlattr *parent_attr; + + parent_attr = info->attrs[NL80211_ATTR_MESH_PARAMS]; + if (!parent_attr) + return -EINVAL; + if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX, + parent_attr, nl80211_meshconf_params_policy)) + return -EINVAL; + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + return err; + + /* This makes sure that there aren't more than 32 mesh config + * parameters (otherwise our bitfield scheme would not work.) */ + BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32); + + /* Fill in the params struct */ + mask = 0; + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, + mask, NL80211_MESHCONF_RETRY_TIMEOUT, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, + mask, NL80211_MESHCONF_CONFIRM_TIMEOUT, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, + mask, NL80211_MESHCONF_HOLDING_TIMEOUT, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, + mask, NL80211_MESHCONF_MAX_PEER_LINKS, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, + mask, NL80211_MESHCONF_MAX_RETRIES, nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, + mask, NL80211_MESHCONF_TTL, nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, + mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, + mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, + nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, + mask, NL80211_MESHCONF_PATH_REFRESH_TIME, nla_get_u32); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, + mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, + nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, + mask, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, + nla_get_u32); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval, + mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, + nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, + dot11MeshHWMPnetDiameterTraversalTime, + mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, + nla_get_u16); + + /* Apply changes */ + rtnl_lock(); + err = drv->ops->set_mesh_params(&drv->wiphy, dev, &cfg, mask); + rtnl_unlock(); + + /* cleanup */ + cfg80211_put_dev(drv); + dev_put(dev); + return err; +} + +#undef FILL_IN_MESH_PARAM_IF_SET + static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1]; @@ -1915,6 +2094,18 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_GET_MESH_PARAMS, + .doit = nl80211_get_mesh_params, + .policy = nl80211_policy, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = NL80211_CMD_SET_MESH_PARAMS, + .doit = nl80211_set_mesh_params, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; /* multicast groups */ -- cgit v1.2.3 From e37d4dffdffb7f834bd28d4ae8e3dcdf07fce508 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Mon, 20 Oct 2008 21:20:27 -0400 Subject: mac80211: fix a few typos in mac80211 kernel doc Correct a handful of errors found while reading the mac80211 book. Signed-off-by: Bob Copeland Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e0b1ff9a3148..16c895969e6e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -99,7 +99,7 @@ enum ieee80211_max_queues { * The information provided in this structure is required for QoS * transmit queue configuration. Cf. IEEE 802.11 7.3.2.29. * - * @aifs: arbitration interface space [0..255] + * @aifs: arbitration interframe space [0..255] * @cw_min: minimum contention window [a value of the form * 2^n-1 in the range 1..32767] * @cw_max: maximum contention window [like @cw_min] @@ -950,7 +950,7 @@ static inline void SET_IEEE80211_DEV(struct ieee80211_hw *hw, struct device *dev } /** - * SET_IEEE80211_PERM_ADDR - set the permanenet MAC address for 802.11 hardware + * SET_IEEE80211_PERM_ADDR - set the permanent MAC address for 802.11 hardware * * @hw: the &struct ieee80211_hw to set the MAC address for * @addr: the address to set @@ -1043,7 +1043,7 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * This happens everytime the iv16 wraps around (every 65536 packets). The * set_key() call will happen only once for each key (unless the AP did * rekeying), it will not include a valid phase 1 key. The valid phase 1 key is - * provided by udpate_tkip_key only. The trigger that makes mac80211 call this + * provided by update_tkip_key only. The trigger that makes mac80211 call this * handler is software decryption with wrap around of iv16. */ @@ -1177,7 +1177,7 @@ enum ieee80211_ampdu_mlme_action { * Must be implemented. * * @add_interface: Called when a netdevice attached to the hardware is - * enabled. Because it is not called for monitor mode devices, @open + * enabled. Because it is not called for monitor mode devices, @start * and @stop must be implemented. * The driver should perform any initialization it needs before * the device can be enabled. The initial configuration for the @@ -1244,7 +1244,7 @@ enum ieee80211_ampdu_mlme_action { * the stack will not do fragmentation. * * @sta_notify: Notifies low level driver about addition or removal - * of assocaited station or AP. + * of associated station or AP. * * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max), * bursting) for a hardware TX queue. @@ -1544,7 +1544,7 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw, * the next beacon frame from the 802.11 code. The low-level is responsible * for calling this function before beacon data is needed (e.g., based on * hardware interrupt). Returned skb is used only once and low-level driver - * is responsible of freeing it. + * is responsible for freeing it. */ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif); -- cgit v1.2.3 From cf03268e6ed6cfacaa5e32db41ea832c4d10438b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 21 Oct 2008 09:42:38 +0200 Subject: wireless: don't publish __regulatory_hint This function requires an internal lock to be held, so it cannot be published to other modules in the kernel. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/wireless.h | 37 +++++++++---------------------------- net/wireless/reg.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/wireless.h b/include/net/wireless.h index c0aa0fb8db5e..061fe5017e5c 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -340,33 +340,6 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq) return __ieee80211_get_channel(wiphy, freq); } -/** - * __regulatory_hint - hint to the wireless core a regulatory domain - * @wiphy: if a driver is providing the hint this is the driver's very - * own &struct wiphy - * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain - * should be in. If @rd is set this should be NULL - * @rd: a complete regulatory domain, if passed the caller need not worry - * about freeing it - * - * The Wireless subsystem can use this function to hint to the wireless core - * what it believes should be the current regulatory domain by - * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory - * domain should be in or by providing a completely build regulatory domain. - * - * Returns -EALREADY if *a regulatory domain* has already been set. Note that - * this could be by another driver. It is safe for drivers to continue if - * -EALREADY is returned, if drivers are not capable of world roaming they - * should not register more channels than they support. Right now we only - * support listening to the first driver hint. If the driver is capable - * of world roaming but wants to respect its own EEPROM mappings for - * specific regulatory domains it should register the @reg_notifier callback - * on the &struct wiphy. Returns 0 if the hint went through fine or through an - * intersection operation. Otherwise a standard error code is returned. - * - */ -extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, - const char *alpha2, struct ieee80211_regdomain *rd); /** * regulatory_hint - driver hint to the wireless core a regulatory domain * @wiphy: the driver's very own &struct wiphy @@ -388,7 +361,15 @@ extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, * the wireless core it is unknown. If you pass a built regulatory domain * and we return non zero you are in charge of kfree()'ing the structure. * - * See __regulatory_hint() documentation for possible return values. + * Returns -EALREADY if *a regulatory domain* has already been set. Note that + * this could be by another driver. It is safe for drivers to continue if + * -EALREADY is returned, if drivers are not capable of world roaming they + * should not register more channels than they support. Right now we only + * support listening to the first driver hint. If the driver is capable + * of world roaming but wants to respect its own EEPROM mappings for + * specific regulatory domains it should register the @reg_notifier callback + * on the &struct wiphy. Returns 0 if the hint went through fine or through an + * intersection operation. Otherwise a standard error code is returned. */ extern int regulatory_hint(struct wiphy *wiphy, const char *alpha2, struct ieee80211_regdomain *rd); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index a33362872f3c..a4845b140a84 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -10,4 +10,32 @@ void regulatory_exit(void); int set_regdom(const struct ieee80211_regdomain *rd); +/** + * __regulatory_hint - hint to the wireless core a regulatory domain + * @wiphy: if a driver is providing the hint this is the driver's very + * own &struct wiphy + * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain + * should be in. If @rd is set this should be NULL + * @rd: a complete regulatory domain, if passed the caller need not worry + * about freeing it + * + * The Wireless subsystem can use this function to hint to the wireless core + * what it believes should be the current regulatory domain by + * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory + * domain should be in or by providing a completely build regulatory domain. + * + * Returns -EALREADY if *a regulatory domain* has already been set. Note that + * this could be by another driver. It is safe for drivers to continue if + * -EALREADY is returned, if drivers are not capable of world roaming they + * should not register more channels than they support. Right now we only + * support listening to the first driver hint. If the driver is capable + * of world roaming but wants to respect its own EEPROM mappings for + * specific regulatory domains it should register the @reg_notifier callback + * on the &struct wiphy. Returns 0 if the hint went through fine or through an + * intersection operation. Otherwise a standard error code is returned. + * + */ +extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, + const char *alpha2, struct ieee80211_regdomain *rd); + #endif /* __NET_WIRELESS_REG_H */ -- cgit v1.2.3 From 7e272fcff6f0a32a3d46e600ea5895f6058f4e2d Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 24 Sep 2008 18:13:14 -0400 Subject: wireless: consolidate on a single escape_essid implementation Signed-off-by: John W. Linville --- drivers/net/wireless/Kconfig | 1 + drivers/net/wireless/ipw2100.c | 10 +-- drivers/net/wireless/ipw2200.c | 118 ++++++++++++++-------------- drivers/net/wireless/ipw2200.h | 1 + drivers/net/wireless/iwlwifi/Kconfig | 2 + drivers/net/wireless/iwlwifi/iwl-scan.c | 52 ++---------- drivers/net/wireless/iwlwifi/iwl3945-base.c | 48 +---------- drivers/net/wireless/libertas/assoc.c | 14 ++-- drivers/net/wireless/libertas/cmd.c | 3 +- drivers/net/wireless/libertas/debugfs.c | 3 +- drivers/net/wireless/libertas/decl.h | 4 - drivers/net/wireless/libertas/main.c | 27 ------- drivers/net/wireless/libertas/scan.c | 11 ++- drivers/net/wireless/libertas/wext.c | 3 +- include/net/ieee80211.h | 20 ----- include/net/lib80211.h | 31 ++++++++ net/ieee80211/Kconfig | 1 + net/ieee80211/ieee80211_module.c | 26 ------ net/ieee80211/ieee80211_rx.c | 31 ++++---- net/ieee80211/ieee80211_wx.c | 6 +- net/wireless/Kconfig | 10 +++ net/wireless/Makefile | 1 + net/wireless/lib80211.c | 58 ++++++++++++++ 23 files changed, 219 insertions(+), 262 deletions(-) create mode 100644 include/net/lib80211.h create mode 100644 net/wireless/lib80211.c (limited to 'include') diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index 45bdf0b339bb..42afaedbb219 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -271,6 +271,7 @@ config LIBERTAS tristate "Marvell 8xxx Libertas WLAN driver support" depends on WLAN_80211 select WIRELESS_EXT + select LIB80211 select FW_LOADER ---help--- A library for Marvell Libertas 8xxx devices. diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c index 079dbd07e2f6..223914e3e07e 100644 --- a/drivers/net/wireless/ipw2100.c +++ b/drivers/net/wireless/ipw2100.c @@ -1975,7 +1975,7 @@ static void isr_indicate_associated(struct ipw2100_priv *priv, u32 status) } IPW_DEBUG_INFO("%s: Associated with '%s' at %s, channel %d (BSSID=%pM)\n", - priv->net_dev->name, escape_essid(essid, essid_len), + priv->net_dev->name, escape_ssid(essid, essid_len), txratename, chan, bssid); /* now we copy read ssid into dev */ @@ -2003,7 +2003,7 @@ static int ipw2100_set_essid(struct ipw2100_priv *priv, char *essid, }; int err; - IPW_DEBUG_HC("SSID: '%s'\n", escape_essid(essid, ssid_len)); + IPW_DEBUG_HC("SSID: '%s'\n", escape_ssid(essid, ssid_len)); if (ssid_len) memcpy(cmd.host_command_parameters, essid, ssid_len); @@ -2046,7 +2046,7 @@ static void isr_indicate_association_lost(struct ipw2100_priv *priv, u32 status) { IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE | IPW_DL_ASSOC, "disassociated: '%s' %pM \n", - escape_essid(priv->essid, priv->essid_len), + escape_ssid(priv->essid, priv->essid_len), priv->bssid); priv->status &= ~(STATUS_ASSOCIATED | STATUS_ASSOCIATING); @@ -6987,7 +6987,7 @@ static int ipw2100_wx_set_essid(struct net_device *dev, goto done; } - IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n", escape_essid(essid, length), + IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n", escape_ssid(essid, length), length); priv->essid_len = length; @@ -7014,7 +7014,7 @@ static int ipw2100_wx_get_essid(struct net_device *dev, * configured ESSID then return that; otherwise return ANY */ if (priv->config & CFG_STATIC_ESSID || priv->status & STATUS_ASSOCIATED) { IPW_DEBUG_WX("Getting essid: '%s'\n", - escape_essid(priv->essid, priv->essid_len)); + escape_ssid(priv->essid, priv->essid_len)); memcpy(extra, priv->essid, priv->essid_len); wrqu->essid.length = priv->essid_len; wrqu->essid.flags = 1; /* active */ diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c index 6ec6de2960ee..22278f87d1c1 100644 --- a/drivers/net/wireless/ipw2200.c +++ b/drivers/net/wireless/ipw2200.c @@ -4409,8 +4409,8 @@ static void ipw_rx_notification(struct ipw_priv *priv, IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE | IPW_DL_ASSOC, "associated: '%s' %pM \n", - escape_essid(priv->essid, - priv->essid_len), + escape_ssid(priv->essid, + priv->essid_len), priv->bssid); switch (priv->ieee->iw_mode) { @@ -4490,10 +4490,10 @@ static void ipw_rx_notification(struct ipw_priv *priv, "deauthenticated: '%s' " "%pM" ": (0x%04X) - %s \n", - escape_essid(priv-> - essid, - priv-> - essid_len), + escape_ssid(priv-> + essid, + priv-> + essid_len), priv->bssid, le16_to_cpu(auth->status), ipw_get_status_code @@ -4512,7 +4512,7 @@ static void ipw_rx_notification(struct ipw_priv *priv, IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE | IPW_DL_ASSOC, "authenticated: '%s' %pM\n", - escape_essid(priv->essid, + escape_ssid(priv->essid, priv->essid_len), priv->bssid); break; @@ -4540,7 +4540,7 @@ static void ipw_rx_notification(struct ipw_priv *priv, IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE | IPW_DL_ASSOC, "disassociated: '%s' %pM \n", - escape_essid(priv->essid, + escape_ssid(priv->essid, priv->essid_len), priv->bssid); @@ -4578,7 +4578,7 @@ static void ipw_rx_notification(struct ipw_priv *priv, case CMAS_AUTHENTICATED: IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE, "authenticated: '%s' %pM \n", - escape_essid(priv->essid, + escape_ssid(priv->essid, priv->essid_len), priv->bssid); priv->status |= STATUS_AUTH; @@ -4597,8 +4597,8 @@ static void ipw_rx_notification(struct ipw_priv *priv, IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE | IPW_DL_ASSOC, "deauthenticated: '%s' %pM\n", - escape_essid(priv->essid, - priv->essid_len), + escape_ssid(priv->essid, + priv->essid_len), priv->bssid); priv->status &= ~(STATUS_ASSOCIATING | @@ -5430,7 +5430,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, !(network->capability & WLAN_CAPABILITY_IBSS))) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded due to " "capability mismatch.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid); return 0; } @@ -5440,7 +5440,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, if (network->flags & NETWORK_EMPTY_ESSID) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of hidden ESSID.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid); return 0; } @@ -5453,8 +5453,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, network->ssid_len)) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of non-network ESSID.\n", - escape_essid(network->ssid, - network->ssid_len), + escape_ssid(network->ssid, + network->ssid_len), network->bssid); return 0; } @@ -5468,13 +5468,13 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, char escaped[IW_ESSID_MAX_SIZE * 2 + 1]; strncpy(escaped, - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), sizeof(escaped)); IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of ESSID mismatch: '%s'.\n", escaped, network->bssid, - escape_essid(priv->essid, - priv->essid_len)); + escape_ssid(priv->essid, + priv->essid_len)); return 0; } } @@ -5485,14 +5485,14 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, if (network->time_stamp[0] < match->network->time_stamp[0]) { IPW_DEBUG_MERGE("Network '%s excluded because newer than " "current network.\n", - escape_essid(match->network->ssid, - match->network->ssid_len)); + escape_ssid(match->network->ssid, + match->network->ssid_len)); return 0; } else if (network->time_stamp[1] < match->network->time_stamp[1]) { IPW_DEBUG_MERGE("Network '%s excluded because newer than " "current network.\n", - escape_essid(match->network->ssid, - match->network->ssid_len)); + escape_ssid(match->network->ssid, + match->network->ssid_len)); return 0; } @@ -5501,7 +5501,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, time_after(jiffies, network->last_scanned + priv->ieee->scan_age)) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of age: %ums.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid, jiffies_to_msecs(jiffies - network->last_scanned)); @@ -5512,7 +5512,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, (network->channel != priv->channel)) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of channel mismatch: %d != %d.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid, network->channel, priv->channel); return 0; @@ -5523,7 +5523,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, ((network->capability & WLAN_CAPABILITY_PRIVACY) ? 1 : 0)) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of privacy mismatch: %s != %s.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid, priv-> capability & CAP_PRIVACY_ON ? "on" : "off", @@ -5536,8 +5536,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, if (!memcmp(network->bssid, priv->bssid, ETH_ALEN)) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of the same BSSID match: %pM" - ".\n", escape_essid(network->ssid, - network->ssid_len), + ".\n", escape_ssid(network->ssid, + network->ssid_len), network->bssid, priv->bssid); return 0; @@ -5548,7 +5548,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of invalid frequency/mode " "combination.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid); return 0; } @@ -5559,7 +5559,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because configured rate mask excludes " "AP mandatory rate.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid); return 0; } @@ -5567,7 +5567,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, if (rates.num_rates == 0) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of no compatible rates.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid); return 0; } @@ -5580,7 +5580,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, ipw_copy_rates(&match->rates, &rates); match->network = network; IPW_DEBUG_MERGE("Network '%s (%pM)' is a viable match.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid); return 1; @@ -5618,8 +5618,8 @@ static void ipw_merge_adhoc_network(struct work_struct *work) mutex_lock(&priv->mutex); if ((priv->ieee->iw_mode == IW_MODE_ADHOC)) { IPW_DEBUG_MERGE("remove network %s\n", - escape_essid(priv->essid, - priv->essid_len)); + escape_ssid(priv->essid, + priv->essid_len)); ipw_remove_current_network(priv); } @@ -5644,7 +5644,7 @@ static int ipw_best_network(struct ipw_priv *priv, !(network->capability & WLAN_CAPABILITY_IBSS))) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded due to " "capability mismatch.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid); return 0; } @@ -5654,7 +5654,7 @@ static int ipw_best_network(struct ipw_priv *priv, if (network->flags & NETWORK_EMPTY_ESSID) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of hidden ESSID.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid); return 0; } @@ -5667,7 +5667,7 @@ static int ipw_best_network(struct ipw_priv *priv, network->ssid_len)) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of non-network ESSID.\n", - escape_essid(network->ssid, + escape_ssid(network->ssid, network->ssid_len), network->bssid); return 0; @@ -5681,13 +5681,13 @@ static int ipw_best_network(struct ipw_priv *priv, min(network->ssid_len, priv->essid_len)))) { char escaped[IW_ESSID_MAX_SIZE * 2 + 1]; strncpy(escaped, - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), sizeof(escaped)); IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of ESSID mismatch: '%s'.\n", escaped, network->bssid, - escape_essid(priv->essid, - priv->essid_len)); + escape_ssid(priv->essid, + priv->essid_len)); return 0; } } @@ -5697,13 +5697,13 @@ static int ipw_best_network(struct ipw_priv *priv, if (match->network && match->network->stats.rssi > network->stats.rssi) { char escaped[IW_ESSID_MAX_SIZE * 2 + 1]; strncpy(escaped, - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), sizeof(escaped)); IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded because " "'%s (%pM)' has a stronger signal.\n", escaped, network->bssid, - escape_essid(match->network->ssid, - match->network->ssid_len), + escape_ssid(match->network->ssid, + match->network->ssid_len), match->network->bssid); return 0; } @@ -5715,7 +5715,7 @@ static int ipw_best_network(struct ipw_priv *priv, IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of storming (%ums since last " "assoc attempt).\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid, jiffies_to_msecs(jiffies - network->last_associate)); @@ -5727,7 +5727,7 @@ static int ipw_best_network(struct ipw_priv *priv, time_after(jiffies, network->last_scanned + priv->ieee->scan_age)) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of age: %ums.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid, jiffies_to_msecs(jiffies - network->last_scanned)); @@ -5738,7 +5738,7 @@ static int ipw_best_network(struct ipw_priv *priv, (network->channel != priv->channel)) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of channel mismatch: %d != %d.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid, network->channel, priv->channel); return 0; @@ -5749,7 +5749,7 @@ static int ipw_best_network(struct ipw_priv *priv, ((network->capability & WLAN_CAPABILITY_PRIVACY) ? 1 : 0)) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of privacy mismatch: %s != %s.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid, priv->capability & CAP_PRIVACY_ON ? "on" : "off", @@ -5762,7 +5762,7 @@ static int ipw_best_network(struct ipw_priv *priv, memcmp(network->bssid, priv->bssid, ETH_ALEN)) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of BSSID mismatch: %pM.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid, priv->bssid); return 0; } @@ -5772,7 +5772,7 @@ static int ipw_best_network(struct ipw_priv *priv, IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of invalid frequency/mode " "combination.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid); return 0; } @@ -5781,7 +5781,7 @@ static int ipw_best_network(struct ipw_priv *priv, if (!ieee80211_is_valid_channel(priv->ieee, network->channel)) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of invalid channel in current GEO\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid); return 0; } @@ -5792,7 +5792,7 @@ static int ipw_best_network(struct ipw_priv *priv, IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because configured rate mask excludes " "AP mandatory rate.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid); return 0; } @@ -5800,7 +5800,7 @@ static int ipw_best_network(struct ipw_priv *priv, if (rates.num_rates == 0) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of no compatible rates.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid); return 0; } @@ -5814,7 +5814,7 @@ static int ipw_best_network(struct ipw_priv *priv, match->network = network; IPW_DEBUG_ASSOC("Network '%s (%pM)' is a viable match.\n", - escape_essid(network->ssid, network->ssid_len), + escape_ssid(network->ssid, network->ssid_len), network->bssid); return 1; @@ -6065,7 +6065,7 @@ static void ipw_debug_config(struct ipw_priv *priv) IPW_DEBUG_INFO("Channel unlocked.\n"); if (priv->config & CFG_STATIC_ESSID) IPW_DEBUG_INFO("ESSID locked to '%s'\n", - escape_essid(priv->essid, priv->essid_len)); + escape_ssid(priv->essid, priv->essid_len)); else IPW_DEBUG_INFO("ESSID unlocked.\n"); if (priv->config & CFG_STATIC_BSSID) @@ -7352,7 +7352,7 @@ static int ipw_associate_network(struct ipw_priv *priv, IPW_DEBUG_ASSOC("%sssocation attempt: '%s', channel %d, " "802.11%c [%d], %s[:%s], enc=%s%s%s%c%c\n", roaming ? "Rea" : "A", - escape_essid(priv->essid, priv->essid_len), + escape_ssid(priv->essid, priv->essid_len), network->channel, ipw_modes[priv->assoc_request.ieee_mode], rates->num_rates, @@ -7452,7 +7452,7 @@ static int ipw_associate_network(struct ipw_priv *priv, } IPW_DEBUG(IPW_DL_STATE, "associating: '%s' %pM \n", - escape_essid(priv->essid, priv->essid_len), + escape_ssid(priv->essid, priv->essid_len), priv->bssid); return 0; @@ -7604,8 +7604,8 @@ static int ipw_associate(void *data) target = oldest; IPW_DEBUG_ASSOC("Expired '%s' (%pM) from " "network list.\n", - escape_essid(target->ssid, - target->ssid_len), + escape_ssid(target->ssid, + target->ssid_len), target->bssid); list_add_tail(&target->list, &priv->ieee->network_free_list); @@ -9057,7 +9057,7 @@ static int ipw_wx_set_essid(struct net_device *dev, return 0; } - IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n", escape_essid(extra, length), + IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n", escape_ssid(extra, length), length); priv->essid_len = length; @@ -9084,7 +9084,7 @@ static int ipw_wx_get_essid(struct net_device *dev, if (priv->config & CFG_STATIC_ESSID || priv->status & (STATUS_ASSOCIATED | STATUS_ASSOCIATING)) { IPW_DEBUG_WX("Getting essid: '%s'\n", - escape_essid(priv->essid, priv->essid_len)); + escape_ssid(priv->essid, priv->essid_len)); memcpy(extra, priv->essid, priv->essid_len); wrqu->essid.length = priv->essid_len; wrqu->essid.flags = 1; /* active */ diff --git a/drivers/net/wireless/ipw2200.h b/drivers/net/wireless/ipw2200.h index 0bad1ec3e7e0..0a84d52147bd 100644 --- a/drivers/net/wireless/ipw2200.h +++ b/drivers/net/wireless/ipw2200.h @@ -48,6 +48,7 @@ #include #include +#include #include #include diff --git a/drivers/net/wireless/iwlwifi/Kconfig b/drivers/net/wireless/iwlwifi/Kconfig index b0ac0ce3fb9f..47bee0ee0a7c 100644 --- a/drivers/net/wireless/iwlwifi/Kconfig +++ b/drivers/net/wireless/iwlwifi/Kconfig @@ -4,6 +4,7 @@ config IWLWIFI config IWLCORE tristate "Intel Wireless Wifi Core" depends on PCI && MAC80211 && WLAN_80211 && EXPERIMENTAL + select LIB80211 select IWLWIFI select MAC80211_LEDS if IWLWIFI_LEDS select LEDS_CLASS if IWLWIFI_LEDS @@ -105,6 +106,7 @@ config IWL3945 tristate "Intel PRO/Wireless 3945ABG/BG Network Connection" depends on PCI && MAC80211 && WLAN_80211 && EXPERIMENTAL select FW_LOADER + select LIB80211 select IWLWIFI select MAC80211_LEDS if IWL3945_LEDS select LEDS_CLASS if IWL3945_LEDS diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c index 86b74571b513..1cc8aa592821 100644 --- a/drivers/net/wireless/iwlwifi/iwl-scan.c +++ b/drivers/net/wireless/iwlwifi/iwl-scan.c @@ -25,8 +25,10 @@ * Tomas Winkler * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 *****************************************************************************/ -#include +#include #include +#include +#include #include "iwl-eeprom.h" #include "iwl-dev.h" @@ -64,48 +66,6 @@ #define IWL_SCAN_PROBE_MASK(n) cpu_to_le32((BIT(n) | (BIT(n) - BIT(1)))) -static int iwl_is_empty_essid(const char *essid, int essid_len) -{ - /* Single white space is for Linksys APs */ - if (essid_len == 1 && essid[0] == ' ') - return 1; - - /* Otherwise, if the entire essid is 0, we assume it is hidden */ - while (essid_len) { - essid_len--; - if (essid[essid_len] != '\0') - return 0; - } - - return 1; -} - - - -static const char *iwl_escape_essid(const char *essid, u8 essid_len) -{ - static char escaped[IW_ESSID_MAX_SIZE * 2 + 1]; - const char *s = essid; - char *d = escaped; - - if (iwl_is_empty_essid(essid, essid_len)) { - memcpy(escaped, "", sizeof("")); - return escaped; - } - - essid_len = min(essid_len, (u8) IW_ESSID_MAX_SIZE); - while (essid_len--) { - if (*s == '\0') { - *d++ = '\\'; - *d++ = '0'; - s++; - } else - *d++ = *s++; - } - *d = '\0'; - return escaped; -} - /** * iwl_scan_cancel - Cancel any currently executing HW scan * @@ -775,8 +735,8 @@ static void iwl_bg_request_scan(struct work_struct *data) /* We should add the ability for user to lock to PASSIVE ONLY */ if (priv->one_direct_scan) { IWL_DEBUG_SCAN("Start direct scan for '%s'\n", - iwl_escape_essid(priv->direct_ssid, - priv->direct_ssid_len)); + escape_ssid(priv->direct_ssid, + priv->direct_ssid_len)); scan->direct_scan[0].id = WLAN_EID_SSID; scan->direct_scan[0].len = priv->direct_ssid_len; memcpy(scan->direct_scan[0].ssid, @@ -784,7 +744,7 @@ static void iwl_bg_request_scan(struct work_struct *data) n_probes++; } else if (!iwl_is_associated(priv) && priv->essid_len) { IWL_DEBUG_SCAN("Start direct scan for '%s' (not associated)\n", - iwl_escape_essid(priv->essid, priv->essid_len)); + escape_ssid(priv->essid, priv->essid_len)); scan->direct_scan[0].id = WLAN_EID_SSID; scan->direct_scan[0].len = priv->essid_len; memcpy(scan->direct_scan[0].ssid, priv->essid, priv->essid_len); diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 2cd33b4e9e13..370cc46b4888 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -41,6 +41,7 @@ #include #include +#include #include #include @@ -107,46 +108,6 @@ static const struct ieee80211_supported_band *iwl3945_get_band( return priv->hw->wiphy->bands[band]; } -static int iwl3945_is_empty_essid(const char *essid, int essid_len) -{ - /* Single white space is for Linksys APs */ - if (essid_len == 1 && essid[0] == ' ') - return 1; - - /* Otherwise, if the entire essid is 0, we assume it is hidden */ - while (essid_len) { - essid_len--; - if (essid[essid_len] != '\0') - return 0; - } - - return 1; -} - -static const char *iwl3945_escape_essid(const char *essid, u8 essid_len) -{ - static char escaped[IW_ESSID_MAX_SIZE * 2 + 1]; - const char *s = essid; - char *d = escaped; - - if (iwl3945_is_empty_essid(essid, essid_len)) { - memcpy(escaped, "", sizeof("")); - return escaped; - } - - essid_len = min(essid_len, (u8) IW_ESSID_MAX_SIZE); - while (essid_len--) { - if (*s == '\0') { - *d++ = '\\'; - *d++ = '0'; - s++; - } else - *d++ = *s++; - } - *d = '\0'; - return escaped; -} - /*************** DMA-QUEUE-GENERAL-FUNCTIONS ***** * DMA services * @@ -6193,8 +6154,7 @@ static void iwl3945_bg_request_scan(struct work_struct *data) if (priv->one_direct_scan) { IWL_DEBUG_SCAN ("Kicking off one direct scan for '%s'\n", - iwl3945_escape_essid(priv->direct_ssid, - priv->direct_ssid_len)); + escape_ssid(priv->direct_ssid, priv->direct_ssid_len)); scan->direct_scan[0].id = WLAN_EID_SSID; scan->direct_scan[0].len = priv->direct_ssid_len; memcpy(scan->direct_scan[0].ssid, @@ -6203,7 +6163,7 @@ static void iwl3945_bg_request_scan(struct work_struct *data) } else if (!iwl3945_is_associated(priv) && priv->essid_len) { IWL_DEBUG_SCAN ("Kicking off one direct scan for '%s' when not associated\n", - iwl3945_escape_essid(priv->essid, priv->essid_len)); + escape_ssid(priv->essid, priv->essid_len)); scan->direct_scan[0].id = WLAN_EID_SSID; scan->direct_scan[0].len = priv->essid_len; memcpy(scan->direct_scan[0].ssid, priv->essid, priv->essid_len); @@ -7018,7 +6978,7 @@ static int iwl3945_mac_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t len) } if (len) { IWL_DEBUG_SCAN("direct scan for %s [%d]\n ", - iwl3945_escape_essid(ssid, len), (int)len); + escape_ssid(ssid, len), (int)len); priv->one_direct_scan = 1; priv->direct_ssid_len = (u8) diff --git a/drivers/net/wireless/libertas/assoc.c b/drivers/net/wireless/libertas/assoc.c index 8b88e9544418..3492e89d1dd5 100644 --- a/drivers/net/wireless/libertas/assoc.c +++ b/drivers/net/wireless/libertas/assoc.c @@ -1,6 +1,8 @@ /* Copyright (C) 2006, Red Hat, Inc. */ +#include #include +#include #include "assoc.h" #include "decl.h" @@ -157,11 +159,11 @@ static int lbs_adhoc_join(struct lbs_private *priv, lbs_deb_enter(LBS_DEB_ASSOC); lbs_deb_join("current SSID '%s', ssid length %u\n", - escape_essid(priv->curbssparams.ssid, + escape_ssid(priv->curbssparams.ssid, priv->curbssparams.ssid_len), priv->curbssparams.ssid_len); lbs_deb_join("requested ssid '%s', ssid length %u\n", - escape_essid(bss->ssid, bss->ssid_len), + escape_ssid(bss->ssid, bss->ssid_len), bss->ssid_len); /* check if the requested SSID is already joined */ @@ -325,7 +327,7 @@ static int lbs_adhoc_start(struct lbs_private *priv, memcpy(cmd.ssid, assoc_req->ssid, assoc_req->ssid_len); lbs_deb_join("ADHOC_START: SSID '%s', ssid length %u\n", - escape_essid(assoc_req->ssid, assoc_req->ssid_len), + escape_ssid(assoc_req->ssid, assoc_req->ssid_len), assoc_req->ssid_len); cmd.bsstype = CMD_BSS_TYPE_IBSS; @@ -704,7 +706,7 @@ static int assoc_helper_essid(struct lbs_private *priv, channel = assoc_req->channel; lbs_deb_assoc("SSID '%s' requested\n", - escape_essid(assoc_req->ssid, assoc_req->ssid_len)); + escape_ssid(assoc_req->ssid, assoc_req->ssid_len)); if (assoc_req->mode == IW_MODE_INFRA) { lbs_send_specific_ssid_scan(priv, assoc_req->ssid, assoc_req->ssid_len); @@ -1228,7 +1230,7 @@ void lbs_association_worker(struct work_struct *work) " secinfo: %s%s%s\n" " auth_mode: %d\n", assoc_req->flags, - escape_essid(assoc_req->ssid, assoc_req->ssid_len), + escape_ssid(assoc_req->ssid, assoc_req->ssid_len), assoc_req->channel, assoc_req->band, assoc_req->mode, assoc_req->bssid, assoc_req->secinfo.WPAenabled ? " WPA" : "", @@ -1814,7 +1816,7 @@ static int lbs_adhoc_post(struct lbs_private *priv, struct cmd_header *resp) wireless_send_event(priv->dev, SIOCGIWAP, &wrqu, NULL); lbs_deb_join("ADHOC_RESP: Joined/started '%s', BSSID %pM, channel %d\n", - escape_essid(bss->ssid, bss->ssid_len), + escape_ssid(bss->ssid, bss->ssid_len), priv->curbssparams.bssid, priv->curbssparams.channel); diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c index d45b07cf6a62..52feab69ee49 100644 --- a/drivers/net/wireless/libertas/cmd.c +++ b/drivers/net/wireless/libertas/cmd.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include "host.h" @@ -1092,7 +1093,7 @@ int lbs_mesh_config(struct lbs_private *priv, uint16_t action, uint16_t chan) } lbs_deb_cmd("mesh config action %d type %x channel %d SSID %s\n", action, priv->mesh_tlv, chan, - escape_essid(priv->mesh_ssid, priv->mesh_ssid_len)); + escape_ssid(priv->mesh_ssid, priv->mesh_ssid_len)); return __lbs_mesh_config_send(priv, &cmd, action, priv->mesh_tlv); } diff --git a/drivers/net/wireless/libertas/debugfs.c b/drivers/net/wireless/libertas/debugfs.c index 5f6bee493f23..84933203be74 100644 --- a/drivers/net/wireless/libertas/debugfs.c +++ b/drivers/net/wireless/libertas/debugfs.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "dev.h" #include "decl.h" @@ -85,7 +86,7 @@ static ssize_t lbs_getscantable(struct file *file, char __user *userbuf, spectrum_mgmt ? 'S' : ' '); pos += snprintf(buf+pos, len-pos, " %04d |", SCAN_RSSI(iter_bss->rssi)); pos += snprintf(buf+pos, len-pos, " %s\n", - escape_essid(iter_bss->ssid, iter_bss->ssid_len)); + escape_ssid(iter_bss->ssid, iter_bss->ssid_len)); numscansdone++; } diff --git a/drivers/net/wireless/libertas/decl.h b/drivers/net/wireless/libertas/decl.h index 1a8888cceadc..0b84bdca0726 100644 --- a/drivers/net/wireless/libertas/decl.h +++ b/drivers/net/wireless/libertas/decl.h @@ -74,8 +74,4 @@ void lbs_host_to_card_done(struct lbs_private *priv); int lbs_update_channel(struct lbs_private *priv); -#ifndef CONFIG_IEEE80211 -const char *escape_essid(const char *essid, u8 essid_len); -#endif - #endif diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c index 34a47f692bd6..e9d23f68174f 100644 --- a/drivers/net/wireless/libertas/main.c +++ b/drivers/net/wireless/libertas/main.c @@ -1646,33 +1646,6 @@ out: return ret; } -#ifndef CONFIG_IEEE80211 -const char *escape_essid(const char *essid, u8 essid_len) -{ - static char escaped[IW_ESSID_MAX_SIZE * 2 + 1]; - const char *s = essid; - char *d = escaped; - - if (ieee80211_is_empty_essid(essid, essid_len)) { - memcpy(escaped, "", sizeof("")); - return escaped; - } - - essid_len = min(essid_len, (u8) IW_ESSID_MAX_SIZE); - while (essid_len--) { - if (*s == '\0') { - *d++ = '\\'; - *d++ = '0'; - s++; - } else { - *d++ = *s++; - } - } - *d = '\0'; - return escaped; -} -#endif - module_init(lbs_init_module); module_exit(lbs_exit_module); diff --git a/drivers/net/wireless/libertas/scan.c b/drivers/net/wireless/libertas/scan.c index 351b3f6e5664..7881890a4e98 100644 --- a/drivers/net/wireless/libertas/scan.c +++ b/drivers/net/wireless/libertas/scan.c @@ -4,9 +4,12 @@ * IOCTL handlers as well as command preperation and response routines * for sending scan commands to the firmware. */ +#include #include #include +#include + #include "host.h" #include "decl.h" #include "dev.h" @@ -452,7 +455,7 @@ int lbs_scan_networks(struct lbs_private *priv, int full_scan) list_for_each_entry(iter, &priv->network_list, list) lbs_deb_scan("%02d: BSSID %pM, RSSI %d, SSID '%s'\n", i++, iter->bssid, iter->rssi, - escape_essid(iter->ssid, iter->ssid_len)); + escape_ssid(iter->ssid, iter->ssid_len)); mutex_unlock(&priv->lock); #endif @@ -599,7 +602,7 @@ static int lbs_process_bss(struct bss_descriptor *bss, bss->ssid_len = min_t(int, 32, elem->len); memcpy(bss->ssid, elem->data, bss->ssid_len); lbs_deb_scan("got SSID IE: '%s', len %u\n", - escape_essid(bss->ssid, bss->ssid_len), + escape_ssid(bss->ssid, bss->ssid_len), bss->ssid_len); break; @@ -742,7 +745,7 @@ int lbs_send_specific_ssid_scan(struct lbs_private *priv, uint8_t *ssid, int ret = 0; lbs_deb_enter_args(LBS_DEB_SCAN, "SSID '%s'\n", - escape_essid(ssid, ssid_len)); + escape_ssid(ssid, ssid_len)); if (!ssid_len) goto out; @@ -966,7 +969,7 @@ int lbs_set_scan(struct net_device *dev, struct iw_request_info *info, priv->scan_ssid_len = req->essid_len; memcpy(priv->scan_ssid, req->essid, priv->scan_ssid_len); lbs_deb_wext("set_scan, essid '%s'\n", - escape_essid(priv->scan_ssid, priv->scan_ssid_len)); + escape_ssid(priv->scan_ssid, priv->scan_ssid_len)); } else { priv->scan_ssid_len = 0; } diff --git a/drivers/net/wireless/libertas/wext.c b/drivers/net/wireless/libertas/wext.c index 04f0bf2ef0a8..247579951858 100644 --- a/drivers/net/wireless/libertas/wext.c +++ b/drivers/net/wireless/libertas/wext.c @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -2005,7 +2006,7 @@ static int lbs_set_essid(struct net_device *dev, struct iw_request_info *info, lbs_deb_wext("requested any SSID\n"); } else { lbs_deb_wext("requested SSID '%s'\n", - escape_essid(ssid, ssid_len)); + escape_ssid(ssid, ssid_len)); } out: diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index 93a56de3594b..dec10c41e2ec 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -127,10 +127,6 @@ static inline bool ieee80211_ratelimit_debug(u32 level) } #endif /* CONFIG_IEEE80211_DEBUG */ -/* escape_essid() is intended to be used in debug (and possibly error) - * messages. It should never be used for passing essid to user space. */ -const char *escape_essid(const char *essid, u8 essid_len); - /* * To use the debug system: * @@ -1135,22 +1131,6 @@ static inline void *ieee80211_priv(struct net_device *dev) return ((struct ieee80211_device *)netdev_priv(dev))->priv; } -static inline int ieee80211_is_empty_essid(const char *essid, int essid_len) -{ - /* Single white space is for Linksys APs */ - if (essid_len == 1 && essid[0] == ' ') - return 1; - - /* Otherwise, if the entire essid is 0, we assume it is hidden */ - while (essid_len) { - essid_len--; - if (essid[essid_len] != '\0') - return 0; - } - - return 1; -} - static inline int ieee80211_is_valid_mode(struct ieee80211_device *ieee, int mode) { diff --git a/include/net/lib80211.h b/include/net/lib80211.h new file mode 100644 index 000000000000..91a64f358cef --- /dev/null +++ b/include/net/lib80211.h @@ -0,0 +1,31 @@ +/* + * lib80211.h -- common bits for IEEE802.11 wireless drivers + * + * Copyright (c) 2008, John W. Linville + * + */ + +#ifndef LIB80211_H +#define LIB80211_H + +/* escape_ssid() is intended to be used in debug (and possibly error) + * messages. It should never be used for passing ssid to user space. */ +const char *escape_ssid(const char *ssid, u8 ssid_len); + +static inline int is_empty_ssid(const char *ssid, int ssid_len) +{ + /* Single white space is for Linksys APs */ + if (ssid_len == 1 && ssid[0] == ' ') + return 1; + + /* Otherwise, if the entire ssid is 0, we assume it is hidden */ + while (ssid_len) { + ssid_len--; + if (ssid[ssid_len] != '\0') + return 0; + } + + return 1; +} + +#endif /* LIB80211_H */ diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig index df9624c3cebf..d2282bb2e4f1 100644 --- a/net/ieee80211/Kconfig +++ b/net/ieee80211/Kconfig @@ -11,6 +11,7 @@ config IEEE80211 select IEEE80211_CRYPT_WEP select IEEE80211_CRYPT_TKIP select IEEE80211_CRYPT_CCMP + select LIB80211 ---help--- This option enables the hardware independent IEEE 802.11 networking stack. This component is deprecated in favor of the diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 949772a5a7dc..d34d4e79b6f7 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -308,31 +308,5 @@ MODULE_PARM_DESC(debug, "debug output mask"); module_exit(ieee80211_exit); module_init(ieee80211_init); -const char *escape_essid(const char *essid, u8 essid_len) -{ - static char escaped[IW_ESSID_MAX_SIZE * 2 + 1]; - const char *s = essid; - char *d = escaped; - - if (ieee80211_is_empty_essid(essid, essid_len)) { - memcpy(escaped, "", sizeof("")); - return escaped; - } - - essid_len = min(essid_len, (u8) IW_ESSID_MAX_SIZE); - while (essid_len--) { - if (*s == '\0') { - *d++ = '\\'; - *d++ = '0'; - s++; - } else { - *d++ = *s++; - } - } - *d = '\0'; - return escaped; -} - EXPORT_SYMBOL(alloc_ieee80211); EXPORT_SYMBOL(free_ieee80211); -EXPORT_SYMBOL(escape_essid); diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index 281223e41c58..876a004918b0 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -32,6 +32,7 @@ #include #include +#include #include static void ieee80211_monitor_rx(struct ieee80211_device *ieee, @@ -1145,8 +1146,8 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element switch (info_element->id) { case MFIE_TYPE_SSID: - if (ieee80211_is_empty_essid(info_element->data, - info_element->len)) { + if (is_empty_ssid(info_element->data, + info_element->len)) { network->flags |= NETWORK_EMPTY_ESSID; break; } @@ -1390,7 +1391,7 @@ static int ieee80211_handle_assoc_resp(struct ieee80211_device *ieee, struct iee network->mode |= IEEE_B; } - if (ieee80211_is_empty_essid(network->ssid, network->ssid_len)) + if (is_empty_ssid(network->ssid, network->ssid_len)) network->flags |= NETWORK_EMPTY_ESSID; memcpy(&network->stats, stats, sizeof(network->stats)); @@ -1456,13 +1457,13 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021 if (network->mode == 0) { IEEE80211_DEBUG_SCAN("Filtered out '%s (%pM)' " "network.\n", - escape_essid(network->ssid, - network->ssid_len), + escape_ssid(network->ssid, + network->ssid_len), network->bssid); return 1; } - if (ieee80211_is_empty_essid(network->ssid, network->ssid_len)) + if (is_empty_ssid(network->ssid, network->ssid_len)) network->flags |= NETWORK_EMPTY_ESSID; memcpy(&network->stats, stats, sizeof(network->stats)); @@ -1576,7 +1577,7 @@ static void ieee80211_process_probe_response(struct ieee80211_device IEEE80211_DEBUG_SCAN("'%s' (%pM" "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n", - escape_essid(info_element->data, info_element->len), + escape_ssid(info_element->data, info_element->len), beacon->header.addr3, (beacon->capability & cpu_to_le16(1 << 0xf)) ? '1' : '0', (beacon->capability & cpu_to_le16(1 << 0xe)) ? '1' : '0', @@ -1597,8 +1598,8 @@ static void ieee80211_process_probe_response(struct ieee80211_device if (ieee80211_network_init(ieee, beacon, &network, stats)) { IEEE80211_DEBUG_SCAN("Dropped '%s' (%pM) via %s.\n", - escape_essid(info_element->data, - info_element->len), + escape_ssid(info_element->data, + info_element->len), beacon->header.addr3, is_beacon(beacon->header.frame_ctl) ? "BEACON" : "PROBE RESPONSE"); @@ -1635,8 +1636,8 @@ static void ieee80211_process_probe_response(struct ieee80211_device target = oldest; IEEE80211_DEBUG_SCAN("Expired '%s' (%pM) from " "network list.\n", - escape_essid(target->ssid, - target->ssid_len), + escape_ssid(target->ssid, + target->ssid_len), target->bssid); ieee80211_network_reset(target); } else { @@ -1648,8 +1649,8 @@ static void ieee80211_process_probe_response(struct ieee80211_device #ifdef CONFIG_IEEE80211_DEBUG IEEE80211_DEBUG_SCAN("Adding '%s' (%pM) via %s.\n", - escape_essid(network.ssid, - network.ssid_len), + escape_ssid(network.ssid, + network.ssid_len), network.bssid, is_beacon(beacon->header.frame_ctl) ? "BEACON" : "PROBE RESPONSE"); @@ -1659,8 +1660,8 @@ static void ieee80211_process_probe_response(struct ieee80211_device list_add_tail(&target->list, &ieee->network_list); } else { IEEE80211_DEBUG_SCAN("Updating '%s' (%pM) via %s.\n", - escape_essid(target->ssid, - target->ssid_len), + escape_ssid(target->ssid, + target->ssid_len), target->bssid, is_beacon(beacon->header.frame_ctl) ? "BEACON" : "PROBE RESPONSE"); diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 89a81062ab4b..3b031c2910ac 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -283,8 +283,8 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee, else IEEE80211_DEBUG_SCAN("Not showing network '%s (" "%pM)' due to age (%dms).\n", - escape_essid(network->ssid, - network->ssid_len), + escape_ssid(network->ssid, + network->ssid_len), network->bssid, jiffies_to_msecs(jiffies - network-> @@ -408,7 +408,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, memset(sec.keys[key] + erq->length, 0, len - erq->length); IEEE80211_DEBUG_WX("Setting key %d to '%s' (%d:%d bytes)\n", - key, escape_essid(sec.keys[key], len), + key, escape_ssid(sec.keys[key], len), erq->length, len); sec.key_sizes[key] = len; if (*crypt) diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 646c7121dbc0..ae7f2262dfb5 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -72,3 +72,13 @@ config WIRELESS_EXT_SYSFS Say Y if you have programs using it, like old versions of hal. + +config LIB80211 + tristate "Common routines for IEEE802.11 drivers" + default n + help + This options enables a library of common routines used + by IEEE802.11 wireless LAN drivers. + + Drivers should select this themselves if needed. Say Y if + you want this built into your kernel. diff --git a/net/wireless/Makefile b/net/wireless/Makefile index b9f943c45f3b..d2d848d445f2 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_WIRELESS_EXT) += wext.o obj-$(CONFIG_CFG80211) += cfg80211.o +obj-$(CONFIG_LIB80211) += lib80211.o cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o cfg80211-$(CONFIG_NL80211) += nl80211.o diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c new file mode 100644 index 000000000000..b22d271fb675 --- /dev/null +++ b/net/wireless/lib80211.c @@ -0,0 +1,58 @@ +/* + * lib80211 -- common bits for IEEE802.11 drivers + * + * Copyright(c) 2008 John W. Linville + * + */ + +#include +#include + +#include + +#define DRV_NAME "lib80211" + +#define DRV_DESCRIPTION "common routines for IEEE802.11 drivers" + +MODULE_DESCRIPTION(DRV_DESCRIPTION); +MODULE_AUTHOR("John W. Linville "); +MODULE_LICENSE("GPL"); + +const char *escape_ssid(const char *ssid, u8 ssid_len) +{ + static char escaped[IEEE80211_MAX_SSID_LEN * 2 + 1]; + const char *s = ssid; + char *d = escaped; + + if (is_empty_ssid(ssid, ssid_len)) { + memcpy(escaped, "", sizeof("")); + return escaped; + } + + ssid_len = min_t(u8, ssid_len, IEEE80211_MAX_SSID_LEN); + while (ssid_len--) { + if (*s == '\0') { + *d++ = '\\'; + *d++ = '0'; + s++; + } else { + *d++ = *s++; + } + } + *d = '\0'; + return escaped; +} +EXPORT_SYMBOL(escape_ssid); + +static int __init ieee80211_init(void) +{ + printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n"); + return 0; +} + +static void __exit ieee80211_exit(void) +{ +} + +module_init(ieee80211_init); +module_exit(ieee80211_exit); -- cgit v1.2.3 From c5d3dce875ef055ed9b14f169cc967cc2c8faf1f Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 30 Sep 2008 17:17:26 -0400 Subject: wireless: remove NETWORK_EMPTY_ESSID flag It is unnecessary and of questionable value. Also remove is_empty_ssid, as it is also unnecessary. Signed-off-by: John W. Linville --- drivers/net/wireless/ipw2200.c | 23 +---------------------- include/net/ieee80211.h | 1 - include/net/lib80211.h | 16 ---------------- net/ieee80211/ieee80211_rx.c | 15 ++------------- net/ieee80211/ieee80211_wx.c | 12 +++--------- net/wireless/lib80211.c | 5 ----- 6 files changed, 6 insertions(+), 66 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c index 22278f87d1c1..6e0c55c64e1f 100644 --- a/drivers/net/wireless/ipw2200.c +++ b/drivers/net/wireless/ipw2200.c @@ -5435,16 +5435,6 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, return 0; } - /* If we do not have an ESSID for this AP, we can not associate with - * it */ - if (network->flags & NETWORK_EMPTY_ESSID) { - IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " - "because of hidden ESSID.\n", - escape_ssid(network->ssid, network->ssid_len), - network->bssid); - return 0; - } - if (unlikely(roaming)) { /* If we are roaming, then ensure check if this is a valid * network to try and roam to */ @@ -5649,16 +5639,6 @@ static int ipw_best_network(struct ipw_priv *priv, return 0; } - /* If we do not have an ESSID for this AP, we can not associate with - * it */ - if (network->flags & NETWORK_EMPTY_ESSID) { - IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " - "because of hidden ESSID.\n", - escape_ssid(network->ssid, network->ssid_len), - network->bssid); - return 0; - } - if (unlikely(roaming)) { /* If we are roaming, then ensure check if this is a valid * network to try and roam to */ @@ -6881,8 +6861,7 @@ static int ipw_qos_handle_probe_response(struct ipw_priv *priv, if ((priv->status & STATUS_ASSOCIATED) && (priv->ieee->iw_mode == IW_MODE_ADHOC) && (active_network == 0)) { if (memcmp(network->bssid, priv->bssid, ETH_ALEN)) - if ((network->capability & WLAN_CAPABILITY_IBSS) && - !(network->flags & NETWORK_EMPTY_ESSID)) + if (network->capability & WLAN_CAPABILITY_IBSS) if ((network->ssid_len == priv->assoc_network->ssid_len) && !memcmp(network->ssid, diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index dec10c41e2ec..afa34d3be721 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -729,7 +729,6 @@ struct ieee80211_txb { #define MAX_WPA_IE_LEN 64 -#define NETWORK_EMPTY_ESSID (1<<0) #define NETWORK_HAS_OFDM (1<<1) #define NETWORK_HAS_CCK (1<<2) diff --git a/include/net/lib80211.h b/include/net/lib80211.h index 91a64f358cef..ce49a30033b6 100644 --- a/include/net/lib80211.h +++ b/include/net/lib80211.h @@ -12,20 +12,4 @@ * messages. It should never be used for passing ssid to user space. */ const char *escape_ssid(const char *ssid, u8 ssid_len); -static inline int is_empty_ssid(const char *ssid, int ssid_len) -{ - /* Single white space is for Linksys APs */ - if (ssid_len == 1 && ssid[0] == ' ') - return 1; - - /* Otherwise, if the entire ssid is 0, we assume it is hidden */ - while (ssid_len) { - ssid_len--; - if (ssid[ssid_len] != '\0') - return 0; - } - - return 1; -} - #endif /* LIB80211_H */ diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index 876a004918b0..f15f82e7bbfd 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -1146,12 +1146,6 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element switch (info_element->id) { case MFIE_TYPE_SSID: - if (is_empty_ssid(info_element->data, - info_element->len)) { - network->flags |= NETWORK_EMPTY_ESSID; - break; - } - network->ssid_len = min(info_element->len, (u8) IW_ESSID_MAX_SIZE); memcpy(network->ssid, info_element->data, @@ -1161,7 +1155,8 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element IW_ESSID_MAX_SIZE - network->ssid_len); IEEE80211_DEBUG_MGMT("MFIE_TYPE_SSID: '%s' len=%d.\n", - network->ssid, network->ssid_len); + escape_ssid(network->ssid), + network->ssid_len); break; case MFIE_TYPE_RATES: @@ -1391,9 +1386,6 @@ static int ieee80211_handle_assoc_resp(struct ieee80211_device *ieee, struct iee network->mode |= IEEE_B; } - if (is_empty_ssid(network->ssid, network->ssid_len)) - network->flags |= NETWORK_EMPTY_ESSID; - memcpy(&network->stats, stats, sizeof(network->stats)); if (ieee->handle_assoc_response != NULL) @@ -1463,9 +1455,6 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021 return 1; } - if (is_empty_ssid(network->ssid, network->ssid_len)) - network->flags |= NETWORK_EMPTY_ESSID; - memcpy(&network->stats, stats, sizeof(network->stats)); return 0; diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 3b031c2910ac..3025140ae721 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -65,15 +65,9 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee, /* Add the ESSID */ iwe.cmd = SIOCGIWESSID; iwe.u.data.flags = 1; - if (network->flags & NETWORK_EMPTY_ESSID) { - iwe.u.data.length = sizeof(""); - start = iwe_stream_add_point(info, start, stop, - &iwe, ""); - } else { - iwe.u.data.length = min(network->ssid_len, (u8) 32); - start = iwe_stream_add_point(info, start, stop, - &iwe, network->ssid); - } + iwe.u.data.length = min(network->ssid_len, (u8) 32); + start = iwe_stream_add_point(info, start, stop, + &iwe, network->ssid); /* Add the protocol name */ iwe.cmd = SIOCGIWNAME; diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c index b22d271fb675..872cc8dc00a3 100644 --- a/net/wireless/lib80211.c +++ b/net/wireless/lib80211.c @@ -24,11 +24,6 @@ const char *escape_ssid(const char *ssid, u8 ssid_len) const char *s = ssid; char *d = escaped; - if (is_empty_ssid(ssid, ssid_len)) { - memcpy(escaped, "", sizeof("")); - return escaped; - } - ssid_len = min_t(u8, ssid_len, IEEE80211_MAX_SSID_LEN); while (ssid_len--) { if (*s == '\0') { -- cgit v1.2.3 From 9387b7caf3049168fc97a8a9111af8fe2143af18 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 30 Sep 2008 20:59:05 -0400 Subject: wireless: use individual buffers for printing ssid values Also change escape_ssid to print_ssid to match print_mac semantics. Signed-off-by: John W. Linville --- drivers/net/wireless/ipw2100.c | 20 ++-- drivers/net/wireless/ipw2200.c | 153 +++++++++++++++++----------- drivers/net/wireless/iwlwifi/iwl-scan.c | 7 +- drivers/net/wireless/iwlwifi/iwl3945-base.c | 9 +- drivers/net/wireless/libertas/assoc.c | 17 ++-- drivers/net/wireless/libertas/cmd.c | 3 +- drivers/net/wireless/libertas/debugfs.c | 4 +- drivers/net/wireless/libertas/scan.c | 12 ++- drivers/net/wireless/libertas/wext.c | 3 +- include/linux/ieee80211.h | 6 +- include/net/lib80211.h | 5 +- net/ieee80211/ieee80211_rx.c | 19 ++-- net/ieee80211/ieee80211_wx.c | 7 +- net/wireless/lib80211.c | 9 +- 14 files changed, 168 insertions(+), 106 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c index 223914e3e07e..062c9f280304 100644 --- a/drivers/net/wireless/ipw2100.c +++ b/drivers/net/wireless/ipw2100.c @@ -163,6 +163,8 @@ that only one external action is invoked at a time. #include #include +#include + #include "ipw2100.h" #define IPW2100_VERSION "git-1.2.2" @@ -1914,6 +1916,7 @@ static void isr_indicate_associated(struct ipw2100_priv *priv, u32 status) u32 chan; char *txratename; u8 bssid[ETH_ALEN]; + DECLARE_SSID_BUF(ssid); /* * TBD: BSSID is usually 00:00:00:00:00:00 here and not @@ -1975,7 +1978,7 @@ static void isr_indicate_associated(struct ipw2100_priv *priv, u32 status) } IPW_DEBUG_INFO("%s: Associated with '%s' at %s, channel %d (BSSID=%pM)\n", - priv->net_dev->name, escape_ssid(essid, essid_len), + priv->net_dev->name, print_ssid(ssid, essid, essid_len), txratename, chan, bssid); /* now we copy read ssid into dev */ @@ -2002,8 +2005,9 @@ static int ipw2100_set_essid(struct ipw2100_priv *priv, char *essid, .host_command_length = ssid_len }; int err; + DECLARE_SSID_BUF(ssid); - IPW_DEBUG_HC("SSID: '%s'\n", escape_ssid(essid, ssid_len)); + IPW_DEBUG_HC("SSID: '%s'\n", print_ssid(ssid, essid, ssid_len)); if (ssid_len) memcpy(cmd.host_command_parameters, essid, ssid_len); @@ -2044,9 +2048,11 @@ static int ipw2100_set_essid(struct ipw2100_priv *priv, char *essid, static void isr_indicate_association_lost(struct ipw2100_priv *priv, u32 status) { + DECLARE_SSID_BUF(ssid); + IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE | IPW_DL_ASSOC, "disassociated: '%s' %pM \n", - escape_ssid(priv->essid, priv->essid_len), + print_ssid(ssid, priv->essid, priv->essid_len), priv->bssid); priv->status &= ~(STATUS_ASSOCIATED | STATUS_ASSOCIATING); @@ -6958,6 +6964,7 @@ static int ipw2100_wx_set_essid(struct net_device *dev, char *essid = ""; /* ANY */ int length = 0; int err = 0; + DECLARE_SSID_BUF(ssid); mutex_lock(&priv->action_mutex); if (!(priv->status & STATUS_INITIALIZED)) { @@ -6987,8 +6994,8 @@ static int ipw2100_wx_set_essid(struct net_device *dev, goto done; } - IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n", escape_ssid(essid, length), - length); + IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n", + print_ssid(ssid, essid, length), length); priv->essid_len = length; memcpy(priv->essid, essid, priv->essid_len); @@ -7009,12 +7016,13 @@ static int ipw2100_wx_get_essid(struct net_device *dev, */ struct ipw2100_priv *priv = ieee80211_priv(dev); + DECLARE_SSID_BUF(ssid); /* If we are associated, trying to associate, or have a statically * configured ESSID then return that; otherwise return ANY */ if (priv->config & CFG_STATIC_ESSID || priv->status & STATUS_ASSOCIATED) { IPW_DEBUG_WX("Getting essid: '%s'\n", - escape_ssid(priv->essid, priv->essid_len)); + print_ssid(ssid, priv->essid, priv->essid_len)); memcpy(extra, priv->essid, priv->essid_len); wrqu->essid.length = priv->essid_len; wrqu->essid.flags = 1; /* active */ diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c index 6e0c55c64e1f..2b9d96a5c10e 100644 --- a/drivers/net/wireless/ipw2200.c +++ b/drivers/net/wireless/ipw2200.c @@ -4395,6 +4395,7 @@ static void handle_scan_event(struct ipw_priv *priv) static void ipw_rx_notification(struct ipw_priv *priv, struct ipw_rx_notification *notif) { + DECLARE_SSID_BUF(ssid); u16 size = le16_to_cpu(notif->size); notif->size = le16_to_cpu(notif->size); @@ -4409,8 +4410,8 @@ static void ipw_rx_notification(struct ipw_priv *priv, IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE | IPW_DL_ASSOC, "associated: '%s' %pM \n", - escape_ssid(priv->essid, - priv->essid_len), + print_ssid(ssid, priv->essid, + priv->essid_len), priv->bssid); switch (priv->ieee->iw_mode) { @@ -4490,10 +4491,11 @@ static void ipw_rx_notification(struct ipw_priv *priv, "deauthenticated: '%s' " "%pM" ": (0x%04X) - %s \n", - escape_ssid(priv-> - essid, - priv-> - essid_len), + print_ssid(ssid, + priv-> + essid, + priv-> + essid_len), priv->bssid, le16_to_cpu(auth->status), ipw_get_status_code @@ -4512,8 +4514,8 @@ static void ipw_rx_notification(struct ipw_priv *priv, IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE | IPW_DL_ASSOC, "authenticated: '%s' %pM\n", - escape_ssid(priv->essid, - priv->essid_len), + print_ssid(ssid, priv->essid, + priv->essid_len), priv->bssid); break; } @@ -4540,8 +4542,8 @@ static void ipw_rx_notification(struct ipw_priv *priv, IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE | IPW_DL_ASSOC, "disassociated: '%s' %pM \n", - escape_ssid(priv->essid, - priv->essid_len), + print_ssid(ssid, priv->essid, + priv->essid_len), priv->bssid); priv->status &= @@ -4578,8 +4580,8 @@ static void ipw_rx_notification(struct ipw_priv *priv, case CMAS_AUTHENTICATED: IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE, "authenticated: '%s' %pM \n", - escape_ssid(priv->essid, - priv->essid_len), + print_ssid(ssid, priv->essid, + priv->essid_len), priv->bssid); priv->status |= STATUS_AUTH; break; @@ -4597,8 +4599,8 @@ static void ipw_rx_notification(struct ipw_priv *priv, IPW_DEBUG(IPW_DL_NOTIF | IPW_DL_STATE | IPW_DL_ASSOC, "deauthenticated: '%s' %pM\n", - escape_ssid(priv->essid, - priv->essid_len), + print_ssid(ssid, priv->essid, + priv->essid_len), priv->bssid); priv->status &= ~(STATUS_ASSOCIATING | @@ -5423,6 +5425,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, int roaming) { struct ipw_supported_rates rates; + DECLARE_SSID_BUF(ssid); /* Verify that this network's capability is compatible with the * current mode (AdHoc or Infrastructure) */ @@ -5430,7 +5433,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, !(network->capability & WLAN_CAPABILITY_IBSS))) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded due to " "capability mismatch.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid); return 0; } @@ -5443,8 +5447,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, network->ssid_len)) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of non-network ESSID.\n", - escape_ssid(network->ssid, - network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid); return 0; } @@ -5458,13 +5462,14 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, char escaped[IW_ESSID_MAX_SIZE * 2 + 1]; strncpy(escaped, - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), sizeof(escaped)); IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of ESSID mismatch: '%s'.\n", escaped, network->bssid, - escape_ssid(priv->essid, - priv->essid_len)); + print_ssid(ssid, priv->essid, + priv->essid_len)); return 0; } } @@ -5475,14 +5480,14 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, if (network->time_stamp[0] < match->network->time_stamp[0]) { IPW_DEBUG_MERGE("Network '%s excluded because newer than " "current network.\n", - escape_ssid(match->network->ssid, - match->network->ssid_len)); + print_ssid(ssid, match->network->ssid, + match->network->ssid_len)); return 0; } else if (network->time_stamp[1] < match->network->time_stamp[1]) { IPW_DEBUG_MERGE("Network '%s excluded because newer than " "current network.\n", - escape_ssid(match->network->ssid, - match->network->ssid_len)); + print_ssid(ssid, match->network->ssid, + match->network->ssid_len)); return 0; } @@ -5491,7 +5496,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, time_after(jiffies, network->last_scanned + priv->ieee->scan_age)) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of age: %ums.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid, jiffies_to_msecs(jiffies - network->last_scanned)); @@ -5502,7 +5508,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, (network->channel != priv->channel)) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of channel mismatch: %d != %d.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid, network->channel, priv->channel); return 0; @@ -5513,7 +5520,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, ((network->capability & WLAN_CAPABILITY_PRIVACY) ? 1 : 0)) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of privacy mismatch: %s != %s.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid, priv-> capability & CAP_PRIVACY_ON ? "on" : "off", @@ -5526,8 +5534,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, if (!memcmp(network->bssid, priv->bssid, ETH_ALEN)) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of the same BSSID match: %pM" - ".\n", escape_ssid(network->ssid, - network->ssid_len), + ".\n", print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid, priv->bssid); return 0; @@ -5538,7 +5546,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of invalid frequency/mode " "combination.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid); return 0; } @@ -5549,7 +5558,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because configured rate mask excludes " "AP mandatory rate.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid); return 0; } @@ -5557,7 +5567,8 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, if (rates.num_rates == 0) { IPW_DEBUG_MERGE("Network '%s (%pM)' excluded " "because of no compatible rates.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid); return 0; } @@ -5570,7 +5581,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, ipw_copy_rates(&match->rates, &rates); match->network = network; IPW_DEBUG_MERGE("Network '%s (%pM)' is a viable match.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, network->ssid_len), network->bssid); return 1; @@ -5578,6 +5589,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv, static void ipw_merge_adhoc_network(struct work_struct *work) { + DECLARE_SSID_BUF(ssid); struct ipw_priv *priv = container_of(work, struct ipw_priv, merge_networks); struct ieee80211_network *network = NULL; @@ -5608,8 +5620,8 @@ static void ipw_merge_adhoc_network(struct work_struct *work) mutex_lock(&priv->mutex); if ((priv->ieee->iw_mode == IW_MODE_ADHOC)) { IPW_DEBUG_MERGE("remove network %s\n", - escape_ssid(priv->essid, - priv->essid_len)); + print_ssid(ssid, priv->essid, + priv->essid_len)); ipw_remove_current_network(priv); } @@ -5625,6 +5637,7 @@ static int ipw_best_network(struct ipw_priv *priv, struct ieee80211_network *network, int roaming) { struct ipw_supported_rates rates; + DECLARE_SSID_BUF(ssid); /* Verify that this network's capability is compatible with the * current mode (AdHoc or Infrastructure) */ @@ -5634,7 +5647,8 @@ static int ipw_best_network(struct ipw_priv *priv, !(network->capability & WLAN_CAPABILITY_IBSS))) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded due to " "capability mismatch.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid); return 0; } @@ -5647,8 +5661,8 @@ static int ipw_best_network(struct ipw_priv *priv, network->ssid_len)) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of non-network ESSID.\n", - escape_ssid(network->ssid, - network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid); return 0; } @@ -5661,13 +5675,14 @@ static int ipw_best_network(struct ipw_priv *priv, min(network->ssid_len, priv->essid_len)))) { char escaped[IW_ESSID_MAX_SIZE * 2 + 1]; strncpy(escaped, - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), sizeof(escaped)); IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of ESSID mismatch: '%s'.\n", escaped, network->bssid, - escape_ssid(priv->essid, - priv->essid_len)); + print_ssid(ssid, priv->essid, + priv->essid_len)); return 0; } } @@ -5677,13 +5692,13 @@ static int ipw_best_network(struct ipw_priv *priv, if (match->network && match->network->stats.rssi > network->stats.rssi) { char escaped[IW_ESSID_MAX_SIZE * 2 + 1]; strncpy(escaped, - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, network->ssid_len), sizeof(escaped)); IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded because " "'%s (%pM)' has a stronger signal.\n", escaped, network->bssid, - escape_ssid(match->network->ssid, - match->network->ssid_len), + print_ssid(ssid, match->network->ssid, + match->network->ssid_len), match->network->bssid); return 0; } @@ -5695,7 +5710,8 @@ static int ipw_best_network(struct ipw_priv *priv, IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of storming (%ums since last " "assoc attempt).\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid, jiffies_to_msecs(jiffies - network->last_associate)); @@ -5707,7 +5723,8 @@ static int ipw_best_network(struct ipw_priv *priv, time_after(jiffies, network->last_scanned + priv->ieee->scan_age)) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of age: %ums.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid, jiffies_to_msecs(jiffies - network->last_scanned)); @@ -5718,7 +5735,8 @@ static int ipw_best_network(struct ipw_priv *priv, (network->channel != priv->channel)) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of channel mismatch: %d != %d.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid, network->channel, priv->channel); return 0; @@ -5729,7 +5747,8 @@ static int ipw_best_network(struct ipw_priv *priv, ((network->capability & WLAN_CAPABILITY_PRIVACY) ? 1 : 0)) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of privacy mismatch: %s != %s.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid, priv->capability & CAP_PRIVACY_ON ? "on" : "off", @@ -5742,7 +5761,8 @@ static int ipw_best_network(struct ipw_priv *priv, memcmp(network->bssid, priv->bssid, ETH_ALEN)) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of BSSID mismatch: %pM.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid, priv->bssid); return 0; } @@ -5752,7 +5772,8 @@ static int ipw_best_network(struct ipw_priv *priv, IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of invalid frequency/mode " "combination.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid); return 0; } @@ -5761,7 +5782,8 @@ static int ipw_best_network(struct ipw_priv *priv, if (!ieee80211_is_valid_channel(priv->ieee, network->channel)) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of invalid channel in current GEO\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid); return 0; } @@ -5772,7 +5794,8 @@ static int ipw_best_network(struct ipw_priv *priv, IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because configured rate mask excludes " "AP mandatory rate.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid); return 0; } @@ -5780,7 +5803,8 @@ static int ipw_best_network(struct ipw_priv *priv, if (rates.num_rates == 0) { IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded " "because of no compatible rates.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, + network->ssid_len), network->bssid); return 0; } @@ -5794,7 +5818,7 @@ static int ipw_best_network(struct ipw_priv *priv, match->network = network; IPW_DEBUG_ASSOC("Network '%s (%pM)' is a viable match.\n", - escape_ssid(network->ssid, network->ssid_len), + print_ssid(ssid, network->ssid, network->ssid_len), network->bssid); return 1; @@ -6037,6 +6061,7 @@ static void ipw_bg_adhoc_check(struct work_struct *work) static void ipw_debug_config(struct ipw_priv *priv) { + DECLARE_SSID_BUF(ssid); IPW_DEBUG_INFO("Scan completed, no valid APs matched " "[CFG 0x%08X]\n", priv->config); if (priv->config & CFG_STATIC_CHANNEL) @@ -6045,7 +6070,7 @@ static void ipw_debug_config(struct ipw_priv *priv) IPW_DEBUG_INFO("Channel unlocked.\n"); if (priv->config & CFG_STATIC_ESSID) IPW_DEBUG_INFO("ESSID locked to '%s'\n", - escape_ssid(priv->essid, priv->essid_len)); + print_ssid(ssid, priv->essid, priv->essid_len)); else IPW_DEBUG_INFO("ESSID unlocked.\n"); if (priv->config & CFG_STATIC_BSSID) @@ -7263,6 +7288,7 @@ static int ipw_associate_network(struct ipw_priv *priv, struct ipw_supported_rates *rates, int roaming) { int err; + DECLARE_SSID_BUF(ssid); if (priv->config & CFG_FIXED_RATE) ipw_set_fixed_rate(priv, network->mode); @@ -7331,7 +7357,7 @@ static int ipw_associate_network(struct ipw_priv *priv, IPW_DEBUG_ASSOC("%sssocation attempt: '%s', channel %d, " "802.11%c [%d], %s[:%s], enc=%s%s%s%c%c\n", roaming ? "Rea" : "A", - escape_ssid(priv->essid, priv->essid_len), + print_ssid(ssid, priv->essid, priv->essid_len), network->channel, ipw_modes[priv->assoc_request.ieee_mode], rates->num_rates, @@ -7431,7 +7457,7 @@ static int ipw_associate_network(struct ipw_priv *priv, } IPW_DEBUG(IPW_DL_STATE, "associating: '%s' %pM \n", - escape_ssid(priv->essid, priv->essid_len), + print_ssid(ssid, priv->essid, priv->essid_len), priv->bssid); return 0; @@ -7522,6 +7548,7 @@ static int ipw_associate(void *data) struct ipw_supported_rates *rates; struct list_head *element; unsigned long flags; + DECLARE_SSID_BUF(ssid); if (priv->ieee->iw_mode == IW_MODE_MONITOR) { IPW_DEBUG_ASSOC("Not attempting association (monitor mode)\n"); @@ -7583,8 +7610,8 @@ static int ipw_associate(void *data) target = oldest; IPW_DEBUG_ASSOC("Expired '%s' (%pM) from " "network list.\n", - escape_ssid(target->ssid, - target->ssid_len), + print_ssid(ssid, target->ssid, + target->ssid_len), target->bssid); list_add_tail(&target->list, &priv->ieee->network_free_list); @@ -9012,6 +9039,7 @@ static int ipw_wx_set_essid(struct net_device *dev, { struct ipw_priv *priv = ieee80211_priv(dev); int length; + DECLARE_SSID_BUF(ssid); mutex_lock(&priv->mutex); @@ -9036,8 +9064,8 @@ static int ipw_wx_set_essid(struct net_device *dev, return 0; } - IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n", escape_ssid(extra, length), - length); + IPW_DEBUG_WX("Setting ESSID: '%s' (%d)\n", + print_ssid(ssid, extra, length), length); priv->essid_len = length; memcpy(priv->essid, extra, priv->essid_len); @@ -9056,6 +9084,7 @@ static int ipw_wx_get_essid(struct net_device *dev, union iwreq_data *wrqu, char *extra) { struct ipw_priv *priv = ieee80211_priv(dev); + DECLARE_SSID_BUF(ssid); /* If we are associated, trying to associate, or have a statically * configured ESSID then return that; otherwise return ANY */ @@ -9063,7 +9092,7 @@ static int ipw_wx_get_essid(struct net_device *dev, if (priv->config & CFG_STATIC_ESSID || priv->status & (STATUS_ASSOCIATED | STATUS_ASSOCIATING)) { IPW_DEBUG_WX("Getting essid: '%s'\n", - escape_ssid(priv->essid, priv->essid_len)); + print_ssid(ssid, priv->essid, priv->essid_len)); memcpy(extra, priv->essid, priv->essid_len); wrqu->essid.length = priv->essid_len; wrqu->essid.flags = 1; /* active */ diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c index 1cc8aa592821..3379b41fb5ee 100644 --- a/drivers/net/wireless/iwlwifi/iwl-scan.c +++ b/drivers/net/wireless/iwlwifi/iwl-scan.c @@ -643,6 +643,7 @@ static void iwl_bg_request_scan(struct work_struct *data) u8 n_probes = 2; u8 rx_chain = priv->hw_params.valid_rx_ant; u8 rate; + DECLARE_SSID_BUF(ssid); conf = ieee80211_get_hw_conf(priv->hw); @@ -735,8 +736,8 @@ static void iwl_bg_request_scan(struct work_struct *data) /* We should add the ability for user to lock to PASSIVE ONLY */ if (priv->one_direct_scan) { IWL_DEBUG_SCAN("Start direct scan for '%s'\n", - escape_ssid(priv->direct_ssid, - priv->direct_ssid_len)); + print_ssid(ssid, priv->direct_ssid, + priv->direct_ssid_len)); scan->direct_scan[0].id = WLAN_EID_SSID; scan->direct_scan[0].len = priv->direct_ssid_len; memcpy(scan->direct_scan[0].ssid, @@ -744,7 +745,7 @@ static void iwl_bg_request_scan(struct work_struct *data) n_probes++; } else if (!iwl_is_associated(priv) && priv->essid_len) { IWL_DEBUG_SCAN("Start direct scan for '%s' (not associated)\n", - escape_ssid(priv->essid, priv->essid_len)); + print_ssid(ssid, priv->essid, priv->essid_len)); scan->direct_scan[0].id = WLAN_EID_SSID; scan->direct_scan[0].len = priv->essid_len; memcpy(scan->direct_scan[0].ssid, priv->essid, priv->essid_len); diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 370cc46b4888..8009094503e4 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -6054,6 +6054,7 @@ static void iwl3945_bg_request_scan(struct work_struct *data) struct ieee80211_conf *conf = NULL; u8 n_probes = 2; enum ieee80211_band band; + DECLARE_SSID_BUF(ssid); conf = ieee80211_get_hw_conf(priv->hw); @@ -6154,7 +6155,8 @@ static void iwl3945_bg_request_scan(struct work_struct *data) if (priv->one_direct_scan) { IWL_DEBUG_SCAN ("Kicking off one direct scan for '%s'\n", - escape_ssid(priv->direct_ssid, priv->direct_ssid_len)); + print_ssid(ssid, priv->direct_ssid, + priv->direct_ssid_len)); scan->direct_scan[0].id = WLAN_EID_SSID; scan->direct_scan[0].len = priv->direct_ssid_len; memcpy(scan->direct_scan[0].ssid, @@ -6163,7 +6165,7 @@ static void iwl3945_bg_request_scan(struct work_struct *data) } else if (!iwl3945_is_associated(priv) && priv->essid_len) { IWL_DEBUG_SCAN ("Kicking off one direct scan for '%s' when not associated\n", - escape_ssid(priv->essid, priv->essid_len)); + print_ssid(ssid, priv->essid, priv->essid_len)); scan->direct_scan[0].id = WLAN_EID_SSID; scan->direct_scan[0].len = priv->essid_len; memcpy(scan->direct_scan[0].ssid, priv->essid, priv->essid_len); @@ -6945,6 +6947,7 @@ static int iwl3945_mac_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t len) int rc = 0; unsigned long flags; struct iwl3945_priv *priv = hw->priv; + DECLARE_SSID_BUF(ssid_buf); IWL_DEBUG_MAC80211("enter\n"); @@ -6978,7 +6981,7 @@ static int iwl3945_mac_hw_scan(struct ieee80211_hw *hw, u8 *ssid, size_t len) } if (len) { IWL_DEBUG_SCAN("direct scan for %s [%d]\n ", - escape_ssid(ssid, len), (int)len); + print_ssid(ssid_buf, ssid, len), (int)len); priv->one_direct_scan = 1; priv->direct_ssid_len = (u8) diff --git a/drivers/net/wireless/libertas/assoc.c b/drivers/net/wireless/libertas/assoc.c index 3492e89d1dd5..92863780286f 100644 --- a/drivers/net/wireless/libertas/assoc.c +++ b/drivers/net/wireless/libertas/assoc.c @@ -153,17 +153,18 @@ static int lbs_adhoc_join(struct lbs_private *priv, struct cmd_ds_802_11_ad_hoc_join cmd; struct bss_descriptor *bss = &assoc_req->bss; u8 preamble = RADIO_PREAMBLE_LONG; + DECLARE_SSID_BUF(ssid); u16 ratesize = 0; int ret = 0; lbs_deb_enter(LBS_DEB_ASSOC); lbs_deb_join("current SSID '%s', ssid length %u\n", - escape_ssid(priv->curbssparams.ssid, + print_ssid(ssid, priv->curbssparams.ssid, priv->curbssparams.ssid_len), priv->curbssparams.ssid_len); lbs_deb_join("requested ssid '%s', ssid length %u\n", - escape_ssid(bss->ssid, bss->ssid_len), + print_ssid(ssid, bss->ssid, bss->ssid_len), bss->ssid_len); /* check if the requested SSID is already joined */ @@ -308,6 +309,7 @@ static int lbs_adhoc_start(struct lbs_private *priv, size_t ratesize = 0; u16 tmpcap = 0; int ret = 0; + DECLARE_SSID_BUF(ssid); lbs_deb_enter(LBS_DEB_ASSOC); @@ -327,7 +329,7 @@ static int lbs_adhoc_start(struct lbs_private *priv, memcpy(cmd.ssid, assoc_req->ssid, assoc_req->ssid_len); lbs_deb_join("ADHOC_START: SSID '%s', ssid length %u\n", - escape_ssid(assoc_req->ssid, assoc_req->ssid_len), + print_ssid(ssid, assoc_req->ssid, assoc_req->ssid_len), assoc_req->ssid_len); cmd.bsstype = CMD_BSS_TYPE_IBSS; @@ -695,6 +697,7 @@ static int assoc_helper_essid(struct lbs_private *priv, int ret = 0; struct bss_descriptor * bss; int channel = -1; + DECLARE_SSID_BUF(ssid); lbs_deb_enter(LBS_DEB_ASSOC); @@ -706,7 +709,7 @@ static int assoc_helper_essid(struct lbs_private *priv, channel = assoc_req->channel; lbs_deb_assoc("SSID '%s' requested\n", - escape_ssid(assoc_req->ssid, assoc_req->ssid_len)); + print_ssid(ssid, assoc_req->ssid, assoc_req->ssid_len)); if (assoc_req->mode == IW_MODE_INFRA) { lbs_send_specific_ssid_scan(priv, assoc_req->ssid, assoc_req->ssid_len); @@ -1207,6 +1210,7 @@ void lbs_association_worker(struct work_struct *work) struct assoc_request * assoc_req = NULL; int ret = 0; int find_any_ssid = 0; + DECLARE_SSID_BUF(ssid); lbs_deb_enter(LBS_DEB_ASSOC); @@ -1230,7 +1234,7 @@ void lbs_association_worker(struct work_struct *work) " secinfo: %s%s%s\n" " auth_mode: %d\n", assoc_req->flags, - escape_ssid(assoc_req->ssid, assoc_req->ssid_len), + print_ssid(ssid, assoc_req->ssid, assoc_req->ssid_len), assoc_req->channel, assoc_req->band, assoc_req->mode, assoc_req->bssid, assoc_req->secinfo.WPAenabled ? " WPA" : "", @@ -1767,6 +1771,7 @@ static int lbs_adhoc_post(struct lbs_private *priv, struct cmd_header *resp) struct cmd_ds_802_11_ad_hoc_result *adhoc_resp; union iwreq_data wrqu; struct bss_descriptor *bss; + DECLARE_SSID_BUF(ssid); lbs_deb_enter(LBS_DEB_JOIN); @@ -1816,7 +1821,7 @@ static int lbs_adhoc_post(struct lbs_private *priv, struct cmd_header *resp) wireless_send_event(priv->dev, SIOCGIWAP, &wrqu, NULL); lbs_deb_join("ADHOC_RESP: Joined/started '%s', BSSID %pM, channel %d\n", - escape_ssid(bss->ssid, bss->ssid_len), + print_ssid(ssid, bss->ssid, bss->ssid_len), priv->curbssparams.bssid, priv->curbssparams.channel); diff --git a/drivers/net/wireless/libertas/cmd.c b/drivers/net/wireless/libertas/cmd.c index 52feab69ee49..38843c8b919c 100644 --- a/drivers/net/wireless/libertas/cmd.c +++ b/drivers/net/wireless/libertas/cmd.c @@ -1063,6 +1063,7 @@ int lbs_mesh_config(struct lbs_private *priv, uint16_t action, uint16_t chan) { struct cmd_ds_mesh_config cmd; struct mrvl_meshie *ie; + DECLARE_SSID_BUF(ssid); memset(&cmd, 0, sizeof(cmd)); cmd.channel = cpu_to_le16(chan); @@ -1093,7 +1094,7 @@ int lbs_mesh_config(struct lbs_private *priv, uint16_t action, uint16_t chan) } lbs_deb_cmd("mesh config action %d type %x channel %d SSID %s\n", action, priv->mesh_tlv, chan, - escape_ssid(priv->mesh_ssid, priv->mesh_ssid_len)); + print_ssid(ssid, priv->mesh_ssid, priv->mesh_ssid_len)); return __lbs_mesh_config_send(priv, &cmd, action, priv->mesh_tlv); } diff --git a/drivers/net/wireless/libertas/debugfs.c b/drivers/net/wireless/libertas/debugfs.c index 84933203be74..ec4efd7ff3c8 100644 --- a/drivers/net/wireless/libertas/debugfs.c +++ b/drivers/net/wireless/libertas/debugfs.c @@ -66,6 +66,7 @@ static ssize_t lbs_getscantable(struct file *file, char __user *userbuf, int numscansdone = 0, res; unsigned long addr = get_zeroed_page(GFP_KERNEL); char *buf = (char *)addr; + DECLARE_SSID_BUF(ssid); struct bss_descriptor * iter_bss; pos += snprintf(buf+pos, len-pos, @@ -86,7 +87,8 @@ static ssize_t lbs_getscantable(struct file *file, char __user *userbuf, spectrum_mgmt ? 'S' : ' '); pos += snprintf(buf+pos, len-pos, " %04d |", SCAN_RSSI(iter_bss->rssi)); pos += snprintf(buf+pos, len-pos, " %s\n", - escape_ssid(iter_bss->ssid, iter_bss->ssid_len)); + print_ssid(ssid, iter_bss->ssid, + iter_bss->ssid_len)); numscansdone++; } diff --git a/drivers/net/wireless/libertas/scan.c b/drivers/net/wireless/libertas/scan.c index 7881890a4e98..5c34ac588189 100644 --- a/drivers/net/wireless/libertas/scan.c +++ b/drivers/net/wireless/libertas/scan.c @@ -362,6 +362,7 @@ int lbs_scan_networks(struct lbs_private *priv, int full_scan) #ifdef CONFIG_LIBERTAS_DEBUG struct bss_descriptor *iter; int i = 0; + DECLARE_SSID_BUF(ssid); #endif lbs_deb_enter_args(LBS_DEB_SCAN, "full_scan %d", full_scan); @@ -455,7 +456,7 @@ int lbs_scan_networks(struct lbs_private *priv, int full_scan) list_for_each_entry(iter, &priv->network_list, list) lbs_deb_scan("%02d: BSSID %pM, RSSI %d, SSID '%s'\n", i++, iter->bssid, iter->rssi, - escape_ssid(iter->ssid, iter->ssid_len)); + print_ssid(ssid, iter->ssid, iter->ssid_len)); mutex_unlock(&priv->lock); #endif @@ -514,6 +515,7 @@ static int lbs_process_bss(struct bss_descriptor *bss, struct ieeetypes_dsparamset *pDS; struct ieeetypes_cfparamset *pCF; struct ieeetypes_ibssparamset *pibss; + DECLARE_SSID_BUF(ssid); struct ieeetypes_countryinfoset *pcountryinfo; uint8_t *pos, *end, *p; uint8_t n_ex_rates = 0, got_basic_rates = 0, n_basic_rates = 0; @@ -602,7 +604,7 @@ static int lbs_process_bss(struct bss_descriptor *bss, bss->ssid_len = min_t(int, 32, elem->len); memcpy(bss->ssid, elem->data, bss->ssid_len); lbs_deb_scan("got SSID IE: '%s', len %u\n", - escape_ssid(bss->ssid, bss->ssid_len), + print_ssid(ssid, bss->ssid, bss->ssid_len), bss->ssid_len); break; @@ -742,10 +744,11 @@ done: int lbs_send_specific_ssid_scan(struct lbs_private *priv, uint8_t *ssid, uint8_t ssid_len) { + DECLARE_SSID_BUF(ssid_buf); int ret = 0; lbs_deb_enter_args(LBS_DEB_SCAN, "SSID '%s'\n", - escape_ssid(ssid, ssid_len)); + print_ssid(ssid_buf, ssid, ssid_len)); if (!ssid_len) goto out; @@ -940,6 +943,7 @@ out: int lbs_set_scan(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) { + DECLARE_SSID_BUF(ssid); struct lbs_private *priv = dev->priv; int ret = 0; @@ -969,7 +973,7 @@ int lbs_set_scan(struct net_device *dev, struct iw_request_info *info, priv->scan_ssid_len = req->essid_len; memcpy(priv->scan_ssid, req->essid, priv->scan_ssid_len); lbs_deb_wext("set_scan, essid '%s'\n", - escape_ssid(priv->scan_ssid, priv->scan_ssid_len)); + print_ssid(ssid, priv->scan_ssid, priv->scan_ssid_len)); } else { priv->scan_ssid_len = 0; } diff --git a/drivers/net/wireless/libertas/wext.c b/drivers/net/wireless/libertas/wext.c index 247579951858..d4c6a659b562 100644 --- a/drivers/net/wireless/libertas/wext.c +++ b/drivers/net/wireless/libertas/wext.c @@ -1978,6 +1978,7 @@ static int lbs_set_essid(struct net_device *dev, struct iw_request_info *info, u8 ssid_len = 0; struct assoc_request * assoc_req; int in_ssid_len = dwrq->length; + DECLARE_SSID_BUF(ssid_buf); lbs_deb_enter(LBS_DEB_WEXT); @@ -2006,7 +2007,7 @@ static int lbs_set_essid(struct net_device *dev, struct iw_request_info *info, lbs_deb_wext("requested any SSID\n"); } else { lbs_deb_wext("requested SSID '%s'\n", - escape_ssid(ssid, ssid_len)); + print_ssid(ssid_buf, ssid, ssid_len)); } out: diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 64a4abce6d91..b0726e2079b5 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -12,8 +12,8 @@ * published by the Free Software Foundation. */ -#ifndef IEEE80211_H -#define IEEE80211_H +#ifndef LINUX_IEEE80211_H +#define LINUX_IEEE80211_H #include #include @@ -1114,4 +1114,4 @@ static inline u8 *ieee80211_get_DA(struct ieee80211_hdr *hdr) return hdr->addr1; } -#endif /* IEEE80211_H */ +#endif /* LINUX_IEEE80211_H */ diff --git a/include/net/lib80211.h b/include/net/lib80211.h index ce49a30033b6..906d96f1b264 100644 --- a/include/net/lib80211.h +++ b/include/net/lib80211.h @@ -8,8 +8,9 @@ #ifndef LIB80211_H #define LIB80211_H -/* escape_ssid() is intended to be used in debug (and possibly error) +/* print_ssid() is intended to be used in debug (and possibly error) * messages. It should never be used for passing ssid to user space. */ -const char *escape_ssid(const char *ssid, u8 ssid_len); +const char *print_ssid(char *buf, const char *ssid, u8 ssid_len); +#define DECLARE_SSID_BUF(var) char var[32 * 4 + 1] __maybe_unused #endif /* LIB80211_H */ diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index f15f82e7bbfd..d19c8de6ef25 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -1124,6 +1124,7 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element *info_element, u16 length, struct ieee80211_network *network) { + DECLARE_SSID_BUF(ssid); u8 i; #ifdef CONFIG_IEEE80211_DEBUG char rates_str[64]; @@ -1155,7 +1156,8 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element IW_ESSID_MAX_SIZE - network->ssid_len); IEEE80211_DEBUG_MGMT("MFIE_TYPE_SSID: '%s' len=%d.\n", - escape_ssid(network->ssid), + print_ssid(ssid, network->ssid, + network->ssid_len), network->ssid_len); break; @@ -1401,6 +1403,8 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021 struct ieee80211_network *network, struct ieee80211_rx_stats *stats) { + DECLARE_SSID_BUF(ssid); + network->qos_data.active = 0; network->qos_data.supported = 0; network->qos_data.param_count = 0; @@ -1449,7 +1453,7 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021 if (network->mode == 0) { IEEE80211_DEBUG_SCAN("Filtered out '%s (%pM)' " "network.\n", - escape_ssid(network->ssid, + print_ssid(ssid, network->ssid, network->ssid_len), network->bssid); return 1; @@ -1563,10 +1567,11 @@ static void ieee80211_process_probe_response(struct ieee80211_device struct ieee80211_info_element *info_element = beacon->info_element; #endif unsigned long flags; + DECLARE_SSID_BUF(ssid); IEEE80211_DEBUG_SCAN("'%s' (%pM" "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n", - escape_ssid(info_element->data, info_element->len), + print_ssid(ssid, info_element->data, info_element->len), beacon->header.addr3, (beacon->capability & cpu_to_le16(1 << 0xf)) ? '1' : '0', (beacon->capability & cpu_to_le16(1 << 0xe)) ? '1' : '0', @@ -1587,7 +1592,7 @@ static void ieee80211_process_probe_response(struct ieee80211_device if (ieee80211_network_init(ieee, beacon, &network, stats)) { IEEE80211_DEBUG_SCAN("Dropped '%s' (%pM) via %s.\n", - escape_ssid(info_element->data, + print_ssid(ssid, info_element->data, info_element->len), beacon->header.addr3, is_beacon(beacon->header.frame_ctl) ? @@ -1625,7 +1630,7 @@ static void ieee80211_process_probe_response(struct ieee80211_device target = oldest; IEEE80211_DEBUG_SCAN("Expired '%s' (%pM) from " "network list.\n", - escape_ssid(target->ssid, + print_ssid(ssid, target->ssid, target->ssid_len), target->bssid); ieee80211_network_reset(target); @@ -1638,7 +1643,7 @@ static void ieee80211_process_probe_response(struct ieee80211_device #ifdef CONFIG_IEEE80211_DEBUG IEEE80211_DEBUG_SCAN("Adding '%s' (%pM) via %s.\n", - escape_ssid(network.ssid, + print_ssid(ssid, network.ssid, network.ssid_len), network.bssid, is_beacon(beacon->header.frame_ctl) ? @@ -1649,7 +1654,7 @@ static void ieee80211_process_probe_response(struct ieee80211_device list_add_tail(&target->list, &ieee->network_list); } else { IEEE80211_DEBUG_SCAN("Updating '%s' (%pM) via %s.\n", - escape_ssid(target->ssid, + print_ssid(ssid, target->ssid, target->ssid_len), target->bssid, is_beacon(beacon->header.frame_ctl) ? diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 3025140ae721..29eb41695a82 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -34,6 +34,7 @@ #include #include +#include #include #include @@ -258,6 +259,7 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee, char *ev = extra; char *stop = ev + wrqu->data.length; int i = 0; + DECLARE_SSID_BUF(ssid); IEEE80211_DEBUG_WX("Getting scan\n"); @@ -277,7 +279,7 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee, else IEEE80211_DEBUG_SCAN("Not showing network '%s (" "%pM)' due to age (%dms).\n", - escape_ssid(network->ssid, + print_ssid(ssid, network->ssid, network->ssid_len), network->bssid, jiffies_to_msecs(jiffies - @@ -307,6 +309,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, int i, key, key_provided, len; struct ieee80211_crypt_data **crypt; int host_crypto = ieee->host_encrypt || ieee->host_decrypt || ieee->host_build_iv; + DECLARE_SSID_BUF(ssid); IEEE80211_DEBUG_WX("SET_ENCODE\n"); @@ -402,7 +405,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, memset(sec.keys[key] + erq->length, 0, len - erq->length); IEEE80211_DEBUG_WX("Setting key %d to '%s' (%d:%d bytes)\n", - key, escape_ssid(sec.keys[key], len), + key, print_ssid(ssid, sec.keys[key], len), erq->length, len); sec.key_sizes[key] = len; if (*crypt) diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c index b8e34d31e757..e71f7d085621 100644 --- a/net/wireless/lib80211.c +++ b/net/wireless/lib80211.c @@ -19,11 +19,10 @@ MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_AUTHOR("John W. Linville "); MODULE_LICENSE("GPL"); -const char *escape_ssid(const char *ssid, u8 ssid_len) +const char *print_ssid(char *buf, const char *ssid, u8 ssid_len) { - static char escaped[IEEE80211_MAX_SSID_LEN * 4 + 1]; const char *s = ssid; - char *d = escaped; + char *d = buf; ssid_len = min_t(u8, ssid_len, IEEE80211_MAX_SSID_LEN); while (ssid_len--) { @@ -48,9 +47,9 @@ const char *escape_ssid(const char *ssid, u8 ssid_len) s++; } *d = '\0'; - return escaped; + return buf; } -EXPORT_SYMBOL(escape_ssid); +EXPORT_SYMBOL(print_ssid); static int __init ieee80211_init(void) { -- cgit v1.2.3 From 72118015271e6d3852cb9f647efe0987d131adaa Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 30 Sep 2008 21:43:03 -0400 Subject: wireless: avoid some net/ieee80211.h vs. linux/ieee80211.h conflicts There is quite a lot of overlap in definitions between these headers... Signed-off-by: John W. Linville --- drivers/net/wireless/hostap/hostap_common.h | 13 ---- drivers/net/wireless/ipw2200.c | 24 +++--- include/linux/ieee80211.h | 3 +- include/net/ieee80211.h | 112 +--------------------------- include/net/lib80211.h | 4 +- net/ieee80211/ieee80211_rx.c | 2 +- 6 files changed, 19 insertions(+), 139 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/hostap/hostap_common.h b/drivers/net/wireless/hostap/hostap_common.h index b470c743c2d1..90b64b092007 100644 --- a/drivers/net/wireless/hostap/hostap_common.h +++ b/drivers/net/wireless/hostap/hostap_common.h @@ -6,19 +6,6 @@ /* IEEE 802.11 defines */ -/* Information Element IDs */ -#define WLAN_EID_SSID 0 -#define WLAN_EID_SUPP_RATES 1 -#define WLAN_EID_FH_PARAMS 2 -#define WLAN_EID_DS_PARAMS 3 -#define WLAN_EID_CF_PARAMS 4 -#define WLAN_EID_TIM 5 -#define WLAN_EID_IBSS_PARAMS 6 -#define WLAN_EID_CHALLENGE 16 -#define WLAN_EID_RSN 48 -#define WLAN_EID_GENERIC 221 - - /* HFA384X Configuration RIDs */ #define HFA384X_RID_CNFPORTTYPE 0xFC00 #define HFA384X_RID_CNFOWNMACADDR 0xFC01 diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c index 2b9d96a5c10e..051ae92d8b65 100644 --- a/drivers/net/wireless/ipw2200.c +++ b/drivers/net/wireless/ipw2200.c @@ -4446,7 +4446,7 @@ static void ipw_rx_notification(struct ipw_priv *priv, #ifdef CONFIG_IPW2200_QOS #define IPW_GET_PACKET_STYPE(x) WLAN_FC_GET_STYPE( \ - le16_to_cpu(((struct ieee80211_hdr *)(x))->frame_ctl)) + le16_to_cpu(((struct ieee80211_hdr *)(x))->frame_control)) if ((priv->status & STATUS_AUTH) && (IPW_GET_PACKET_STYPE(¬if->u.raw) == IEEE80211_STYPE_ASSOC_RESP)) { @@ -7665,12 +7665,12 @@ static void ipw_rebuild_decrypted_skb(struct ipw_priv *priv, u16 fc; hdr = (struct ieee80211_hdr *)skb->data; - fc = le16_to_cpu(hdr->frame_ctl); + fc = le16_to_cpu(hdr->frame_control); if (!(fc & IEEE80211_FCTL_PROTECTED)) return; fc &= ~IEEE80211_FCTL_PROTECTED; - hdr->frame_ctl = cpu_to_le16(fc); + hdr->frame_control = cpu_to_le16(fc); switch (priv->ieee->sec.level) { case SEC_LEVEL_3: /* Remove CCMP HDR */ @@ -7982,17 +7982,17 @@ static void ipw_handle_promiscuous_rx(struct ipw_priv *priv, } hdr = (void *)rxb->skb->data + IPW_RX_FRAME_SIZE; - if (ieee80211_is_management(le16_to_cpu(hdr->frame_ctl))) { + if (ieee80211_is_management(le16_to_cpu(hdr->frame_control))) { if (filter & IPW_PROM_NO_MGMT) return; if (filter & IPW_PROM_MGMT_HEADER_ONLY) hdr_only = 1; - } else if (ieee80211_is_control(le16_to_cpu(hdr->frame_ctl))) { + } else if (ieee80211_is_control(le16_to_cpu(hdr->frame_control))) { if (filter & IPW_PROM_NO_CTL) return; if (filter & IPW_PROM_CTL_HEADER_ONLY) hdr_only = 1; - } else if (ieee80211_is_data(le16_to_cpu(hdr->frame_ctl))) { + } else if (ieee80211_is_data(le16_to_cpu(hdr->frame_control))) { if (filter & IPW_PROM_NO_DATA) return; if (filter & IPW_PROM_DATA_HEADER_ONLY) @@ -8010,7 +8010,7 @@ static void ipw_handle_promiscuous_rx(struct ipw_priv *priv, ipw_rt = (void *)skb->data; if (hdr_only) - len = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl)); + len = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)); memcpy(ipw_rt->payload, hdr, len); @@ -8230,7 +8230,7 @@ static int is_duplicate_packet(struct ipw_priv *priv, /* Comment this line now since we observed the card receives * duplicate packets but the FCTL_RETRY bit is not set in the * IBSS mode with fragmentation enabled. - BUG_ON(!(le16_to_cpu(header->frame_ctl) & IEEE80211_FCTL_RETRY)); */ + BUG_ON(!(le16_to_cpu(header->frame_control) & IEEE80211_FCTL_RETRY)); */ return 1; } @@ -10381,17 +10381,17 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv, /* Filtering of fragment chains is done agains the first fragment */ hdr = (void *)txb->fragments[0]->data; - if (ieee80211_is_management(le16_to_cpu(hdr->frame_ctl))) { + if (ieee80211_is_management(le16_to_cpu(hdr->frame_control))) { if (filter & IPW_PROM_NO_MGMT) return; if (filter & IPW_PROM_MGMT_HEADER_ONLY) hdr_only = 1; - } else if (ieee80211_is_control(le16_to_cpu(hdr->frame_ctl))) { + } else if (ieee80211_is_control(le16_to_cpu(hdr->frame_control))) { if (filter & IPW_PROM_NO_CTL) return; if (filter & IPW_PROM_CTL_HEADER_ONLY) hdr_only = 1; - } else if (ieee80211_is_data(le16_to_cpu(hdr->frame_ctl))) { + } else if (ieee80211_is_data(le16_to_cpu(hdr->frame_control))) { if (filter & IPW_PROM_NO_DATA) return; if (filter & IPW_PROM_DATA_HEADER_ONLY) @@ -10406,7 +10406,7 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv, if (hdr_only) { hdr = (void *)src->data; - len = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl)); + len = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control)); } else len = src->len; diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index b0726e2079b5..aad99195a4cc 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -826,8 +826,7 @@ struct ieee80211_ht_info { /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 -#define WLAN_AUTH_FAST_BSS_TRANSITION 2 -#define WLAN_AUTH_LEAP 128 +#define WLAN_AUTH_LEAP 2 #define WLAN_AUTH_CHALLENGE_LEN 128 diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index afa34d3be721..738734a4653b 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -28,6 +28,7 @@ #include /* ETH_ALEN */ #include /* ARRAY_SIZE */ #include +#include #define IEEE80211_VERSION "git-1.1.13" @@ -214,94 +215,6 @@ struct ieee80211_snap_hdr { #define WLAN_GET_SEQ_FRAG(seq) ((seq) & IEEE80211_SCTL_FRAG) #define WLAN_GET_SEQ_SEQ(seq) (((seq) & IEEE80211_SCTL_SEQ) >> 4) -/* Authentication algorithms */ -#define WLAN_AUTH_OPEN 0 -#define WLAN_AUTH_SHARED_KEY 1 -#define WLAN_AUTH_LEAP 2 - -#define WLAN_AUTH_CHALLENGE_LEN 128 - -#define WLAN_CAPABILITY_ESS (1<<0) -#define WLAN_CAPABILITY_IBSS (1<<1) -#define WLAN_CAPABILITY_CF_POLLABLE (1<<2) -#define WLAN_CAPABILITY_CF_POLL_REQUEST (1<<3) -#define WLAN_CAPABILITY_PRIVACY (1<<4) -#define WLAN_CAPABILITY_SHORT_PREAMBLE (1<<5) -#define WLAN_CAPABILITY_PBCC (1<<6) -#define WLAN_CAPABILITY_CHANNEL_AGILITY (1<<7) -#define WLAN_CAPABILITY_SPECTRUM_MGMT (1<<8) -#define WLAN_CAPABILITY_QOS (1<<9) -#define WLAN_CAPABILITY_SHORT_SLOT_TIME (1<<10) -#define WLAN_CAPABILITY_DSSS_OFDM (1<<13) - -/* 802.11g ERP information element */ -#define WLAN_ERP_NON_ERP_PRESENT (1<<0) -#define WLAN_ERP_USE_PROTECTION (1<<1) -#define WLAN_ERP_BARKER_PREAMBLE (1<<2) - -/* Status codes */ -enum ieee80211_statuscode { - WLAN_STATUS_SUCCESS = 0, - WLAN_STATUS_UNSPECIFIED_FAILURE = 1, - WLAN_STATUS_CAPS_UNSUPPORTED = 10, - WLAN_STATUS_REASSOC_NO_ASSOC = 11, - WLAN_STATUS_ASSOC_DENIED_UNSPEC = 12, - WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG = 13, - WLAN_STATUS_UNKNOWN_AUTH_TRANSACTION = 14, - WLAN_STATUS_CHALLENGE_FAIL = 15, - WLAN_STATUS_AUTH_TIMEOUT = 16, - WLAN_STATUS_AP_UNABLE_TO_HANDLE_NEW_STA = 17, - WLAN_STATUS_ASSOC_DENIED_RATES = 18, - /* 802.11b */ - WLAN_STATUS_ASSOC_DENIED_NOSHORTPREAMBLE = 19, - WLAN_STATUS_ASSOC_DENIED_NOPBCC = 20, - WLAN_STATUS_ASSOC_DENIED_NOAGILITY = 21, - /* 802.11h */ - WLAN_STATUS_ASSOC_DENIED_NOSPECTRUM = 22, - WLAN_STATUS_ASSOC_REJECTED_BAD_POWER = 23, - WLAN_STATUS_ASSOC_REJECTED_BAD_SUPP_CHAN = 24, - /* 802.11g */ - WLAN_STATUS_ASSOC_DENIED_NOSHORTTIME = 25, - WLAN_STATUS_ASSOC_DENIED_NODSSSOFDM = 26, - /* 802.11i */ - WLAN_STATUS_INVALID_IE = 40, - WLAN_STATUS_INVALID_GROUP_CIPHER = 41, - WLAN_STATUS_INVALID_PAIRWISE_CIPHER = 42, - WLAN_STATUS_INVALID_AKMP = 43, - WLAN_STATUS_UNSUPP_RSN_VERSION = 44, - WLAN_STATUS_INVALID_RSN_IE_CAP = 45, - WLAN_STATUS_CIPHER_SUITE_REJECTED = 46, -}; - -/* Reason codes */ -enum ieee80211_reasoncode { - WLAN_REASON_UNSPECIFIED = 1, - WLAN_REASON_PREV_AUTH_NOT_VALID = 2, - WLAN_REASON_DEAUTH_LEAVING = 3, - WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY = 4, - WLAN_REASON_DISASSOC_AP_BUSY = 5, - WLAN_REASON_CLASS2_FRAME_FROM_NONAUTH_STA = 6, - WLAN_REASON_CLASS3_FRAME_FROM_NONASSOC_STA = 7, - WLAN_REASON_DISASSOC_STA_HAS_LEFT = 8, - WLAN_REASON_STA_REQ_ASSOC_WITHOUT_AUTH = 9, - /* 802.11h */ - WLAN_REASON_DISASSOC_BAD_POWER = 10, - WLAN_REASON_DISASSOC_BAD_SUPP_CHAN = 11, - /* 802.11i */ - WLAN_REASON_INVALID_IE = 13, - WLAN_REASON_MIC_FAILURE = 14, - WLAN_REASON_4WAY_HANDSHAKE_TIMEOUT = 15, - WLAN_REASON_GROUP_KEY_HANDSHAKE_TIMEOUT = 16, - WLAN_REASON_IE_DIFFERENT = 17, - WLAN_REASON_INVALID_GROUP_CIPHER = 18, - WLAN_REASON_INVALID_PAIRWISE_CIPHER = 19, - WLAN_REASON_INVALID_AKMP = 20, - WLAN_REASON_UNSUPP_RSN_VERSION = 21, - WLAN_REASON_INVALID_RSN_IE_CAP = 22, - WLAN_REASON_IEEE8021X_FAILED = 23, - WLAN_REASON_CIPHER_SUITE_REJECTED = 24, -}; - /* Action categories - 802.11h */ enum ieee80211_actioncategories { WLAN_ACTION_SPECTRUM_MGMT = 0, @@ -530,15 +443,6 @@ enum ieee80211_mfie { MFIE_TYPE_QOS_PARAMETER = 222, }; -/* Minimal header; can be used for passing 802.11 frames with sufficient - * information to determine what type of underlying data type is actually - * stored in the data. */ -struct ieee80211_hdr { - __le16 frame_ctl; - __le16 duration_id; - u8 payload[0]; -} __attribute__ ((packed)); - struct ieee80211_hdr_1addr { __le16 frame_ctl; __le16 duration_id; @@ -586,18 +490,6 @@ struct ieee80211_hdr_3addrqos { __le16 qos_ctl; } __attribute__ ((packed)); -struct ieee80211_hdr_4addrqos { - __le16 frame_ctl; - __le16 duration_id; - u8 addr1[ETH_ALEN]; - u8 addr2[ETH_ALEN]; - u8 addr3[ETH_ALEN]; - __le16 seq_ctl; - u8 addr4[ETH_ALEN]; - u8 payload[0]; - __le16 qos_ctl; -} __attribute__ ((packed)); - struct ieee80211_info_element { u8 id; u8 len; @@ -1187,7 +1079,7 @@ static inline int ieee80211_get_hdrlen(u16 fc) static inline u8 *ieee80211_get_payload(struct ieee80211_hdr *hdr) { - switch (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl))) { + switch (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control))) { case IEEE80211_1ADDR_LEN: return ((struct ieee80211_hdr_1addr *)hdr)->payload; case IEEE80211_2ADDR_LEN: diff --git a/include/net/lib80211.h b/include/net/lib80211.h index 906d96f1b264..e1558a187ac0 100644 --- a/include/net/lib80211.h +++ b/include/net/lib80211.h @@ -8,9 +8,11 @@ #ifndef LIB80211_H #define LIB80211_H +#include + /* print_ssid() is intended to be used in debug (and possibly error) * messages. It should never be used for passing ssid to user space. */ const char *print_ssid(char *buf, const char *ssid, u8 ssid_len); -#define DECLARE_SSID_BUF(var) char var[32 * 4 + 1] __maybe_unused +#define DECLARE_SSID_BUF(var) char var[IEEE80211_MAX_SSID_LEN * 4 + 1] __maybe_unused #endif /* LIB80211_H */ diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index d19c8de6ef25..a91ef8475467 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -40,7 +40,7 @@ static void ieee80211_monitor_rx(struct ieee80211_device *ieee, struct ieee80211_rx_stats *rx_stats) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - u16 fc = le16_to_cpu(hdr->frame_ctl); + u16 fc = le16_to_cpu(hdr->frame_control); skb->dev = ieee->dev; skb_reset_mac_header(skb); -- cgit v1.2.3 From 8b30b1fe368ab03049435884c11c5c50e4c4ef0b Mon Sep 17 00:00:00 2001 From: Sujith Date: Fri, 24 Oct 2008 09:55:27 +0530 Subject: mac80211: Re-enable aggregation Wireless HW without any dedicated queues for aggregation do not need the ampdu_queues mechanism present right now in mac80211. Since mac80211 is still incomplete wrt TX MQ changes, do not allow aggregation sessions for drivers that set ampdu_queues. This is only an interim hack until Intel fixes the requeue issue. Signed-off-by: Sujith Signed-off-by: Luis Rodriguez Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/main.c | 6 ++-- drivers/net/wireless/iwlwifi/iwl-core.c | 3 +- include/linux/skbuff.h | 4 +++ include/net/mac80211.h | 8 ++--- net/core/skbuff.c | 1 + net/mac80211/ht.c | 60 +++++++++++++++++++-------------- net/mac80211/main.c | 7 ++-- net/mac80211/rx.c | 7 ++-- net/mac80211/tx.c | 19 ++++++++--- net/mac80211/wme.c | 24 ++++++------- 10 files changed, 76 insertions(+), 63 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c index 795fed5cadfa..f6dc4c826044 100644 --- a/drivers/net/wireless/ath9k/main.c +++ b/drivers/net/wireless/ath9k/main.c @@ -953,10 +953,7 @@ static int ath_attach(u16 devid, &sc->sbands[IEEE80211_BAND_5GHZ]; } - /* FIXME: Have to figure out proper hw init values later */ - hw->queues = 4; - hw->ampdu_queues = 1; /* Register rate control */ hw->rate_control_algorithm = "ath9k_rate_control"; @@ -1745,7 +1742,8 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) hw->flags = IEEE80211_HW_RX_INCLUDES_FCS | IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_NOISE_DBM; + IEEE80211_HW_NOISE_DBM | + IEEE80211_HW_AMPDU_AGGREGATION; hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_AP) | diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 20c7ff382914..ba05f5ddc6d0 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -871,7 +871,8 @@ int iwl_setup_mac(struct iwl_priv *priv) /* Tell mac80211 our characteristics */ hw->flags = IEEE80211_HW_SIGNAL_DBM | - IEEE80211_HW_NOISE_DBM; + IEEE80211_HW_NOISE_DBM | + IEEE80211_HW_AMPDU_AGGREGATION; hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_STATION) | diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 487e34507b41..a01b6f84e3bc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -250,6 +250,9 @@ typedef unsigned char *sk_buff_data_t; * @tc_verd: traffic control verdict * @ndisc_nodetype: router type (from link layer) * @do_not_encrypt: set to prevent encryption of this frame + * @requeue: set to indicate that the wireless core should attempt + * a software retry on this frame if we failed to + * receive an ACK for it * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions * @secmark: security marking @@ -326,6 +329,7 @@ struct sk_buff { #endif #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) __u8 do_not_encrypt:1; + __u8 requeue:1; #endif /* 0/13/14 bit hole */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 16c895969e6e..bba96a203885 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -242,7 +242,6 @@ struct ieee80211_bss_conf { * @IEEE80211_TX_CTL_RATE_CTRL_PROBE: internal to mac80211, can be * set by rate control algorithms to indicate probe rate, will * be cleared for fragmented frames (except on the last fragment) - * @IEEE80211_TX_CTL_REQUEUE: REMOVE THIS */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), @@ -258,9 +257,6 @@ enum mac80211_tx_control_flags { IEEE80211_TX_STAT_AMPDU = BIT(10), IEEE80211_TX_STAT_AMPDU_NO_BACK = BIT(11), IEEE80211_TX_CTL_RATE_CTRL_PROBE = BIT(12), - - /* XXX: remove this */ - IEEE80211_TX_CTL_REQUEUE = BIT(13), }; enum mac80211_rate_control_flags { @@ -847,6 +843,9 @@ enum ieee80211_tkip_key_type { * @IEEE80211_HW_SPECTRUM_MGMT: * Hardware supports spectrum management defined in 802.11h * Measurement, Channel Switch, Quieting, TPC + * + * @IEEE80211_HW_AMPDU_AGGREGATION: + * Hardware supports 11n A-MPDU aggregation. */ enum ieee80211_hw_flags { IEEE80211_HW_RX_INCLUDES_FCS = 1<<1, @@ -858,6 +857,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_SIGNAL_DBM = 1<<7, IEEE80211_HW_NOISE_DBM = 1<<8, IEEE80211_HW_SPECTRUM_MGMT = 1<<9, + IEEE80211_HW_AMPDU_AGGREGATION = 1<<10, }; /** diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cdfe473181af..c4c8a33f3418 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -544,6 +544,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) C(truesize); #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE) C(do_not_encrypt); + C(requeue); #endif atomic_set(&n->users, 1); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 42c3e590df98..08009d4b7d6e 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -458,7 +458,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) u8 *state; int ret; - if (tid >= STA_TID_NUM) + if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION)) return -EINVAL; #ifdef CONFIG_MAC80211_HT_DEBUG @@ -515,17 +515,19 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) (unsigned long)&sta->timer_to_tid[tid]; init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); - /* create a new queue for this aggregation */ - ret = ieee80211_ht_agg_queue_add(local, sta, tid); + if (hw->ampdu_queues) { + /* create a new queue for this aggregation */ + ret = ieee80211_ht_agg_queue_add(local, sta, tid); - /* case no queue is available to aggregation - * don't switch to aggregation */ - if (ret) { + /* case no queue is available to aggregation + * don't switch to aggregation */ + if (ret) { #ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - queue unavailable for" - " tid %d\n", tid); + printk(KERN_DEBUG "BA request denied - " + "queue unavailable for tid %d\n", tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - goto err_unlock_queue; + goto err_unlock_queue; + } } sdata = sta->sdata; @@ -544,7 +546,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) /* No need to requeue the packets in the agg queue, since we * held the tx lock: no packet could be enqueued to the newly * allocated queue */ - ieee80211_ht_agg_queue_remove(local, sta, tid, 0); + if (hw->ampdu_queues) + ieee80211_ht_agg_queue_remove(local, sta, tid, 0); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "BA request denied - HW unavailable for" " tid %d\n", tid); @@ -554,7 +557,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) } /* Will put all the packets in the new SW queue */ - ieee80211_requeue(local, ieee802_1d_to_ac[tid]); + if (hw->ampdu_queues) + ieee80211_requeue(local, ieee802_1d_to_ac[tid]); spin_unlock_bh(&sta->lock); /* send an addBA request */ @@ -622,7 +626,8 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, ra, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]); + if (hw->ampdu_queues) + ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]); *state = HT_AGG_STATE_REQ_STOP_BA_MSK | (initiator << HT_AGG_STATE_INITIATOR_SHIFT); @@ -635,7 +640,8 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, if (ret) { WARN_ON(ret != -EBUSY); *state = HT_AGG_STATE_OPERATIONAL; - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + if (hw->ampdu_queues) + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); goto stop_BA_exit; } @@ -691,7 +697,8 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); #endif - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + if (hw->ampdu_queues) + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); } spin_unlock_bh(&sta->lock); rcu_read_unlock(); @@ -745,16 +752,18 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) ieee80211_send_delba(sta->sdata, ra, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); - agg_queue = sta->tid_to_tx_q[tid]; - - ieee80211_ht_agg_queue_remove(local, sta, tid, 1); - - /* We just requeued the all the frames that were in the - * removed queue, and since we might miss a softirq we do - * netif_schedule_queue. ieee80211_wake_queue is not used - * here as this queue is not necessarily stopped - */ - netif_schedule_queue(netdev_get_tx_queue(local->mdev, agg_queue)); + if (hw->ampdu_queues) { + agg_queue = sta->tid_to_tx_q[tid]; + ieee80211_ht_agg_queue_remove(local, sta, tid, 1); + + /* We just requeued the all the frames that were in the + * removed queue, and since we might miss a softirq we do + * netif_schedule_queue. ieee80211_wake_queue is not used + * here as this queue is not necessarily stopped + */ + netif_schedule_queue(netdev_get_tx_queue(local->mdev, + agg_queue)); + } spin_lock_bh(&sta->lock); *state = HT_AGG_STATE_IDLE; sta->ampdu_mlme.addba_req_num[tid] = 0; @@ -1011,7 +1020,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, *state |= HT_ADDBA_RECEIVED_MSK; sta->ampdu_mlme.addba_req_num[tid] = 0; - if (*state == HT_AGG_STATE_OPERATIONAL) + if (*state == HT_AGG_STATE_OPERATIONAL && + local->hw.ampdu_queues) ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); spin_unlock_bh(&sta->lock); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 88c1975a97a5..fa0cc7a1e6b4 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -386,8 +386,6 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, struct sta_info *sta, struct sk_buff *skb) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - sta->tx_filtered_count++; /* @@ -434,10 +432,9 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, return; } - if (!test_sta_flags(sta, WLAN_STA_PS) && - !(info->flags & IEEE80211_TX_CTL_REQUEUE)) { + if (!test_sta_flags(sta, WLAN_STA_PS) && !skb->requeue) { /* Software retry the packet once */ - info->flags |= IEEE80211_TX_CTL_REQUEUE; + skb->requeue = 1; ieee80211_remove_tx_extra(local, sta->key, skb); dev_queue_xmit(skb); return; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c4c95f1db605..648a1d0e6c82 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -669,7 +669,6 @@ static int ap_sta_ps_end(struct sta_info *sta) struct ieee80211_local *local = sdata->local; struct sk_buff *skb; int sent = 0; - struct ieee80211_tx_info *info; atomic_dec(&sdata->bss->num_sta_ps); @@ -685,13 +684,11 @@ static int ap_sta_ps_end(struct sta_info *sta) /* Send all buffered frames to the station */ while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL) { - info = IEEE80211_SKB_CB(skb); sent++; - info->flags |= IEEE80211_TX_CTL_REQUEUE; + skb->requeue = 1; dev_queue_xmit(skb); } while ((skb = skb_dequeue(&sta->ps_tx_buf)) != NULL) { - info = IEEE80211_SKB_CB(skb); local->total_ps_buffered--; sent++; #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG @@ -699,7 +696,7 @@ static int ap_sta_ps_end(struct sta_info *sta) "since STA not sleeping anymore\n", sdata->dev->name, sta->sta.addr, sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ - info->flags |= IEEE80211_TX_CTL_REQUEUE; + skb->requeue = 1; dev_queue_xmit(skb); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 541e3e64493d..d6392af9cd20 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -661,6 +661,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) static ieee80211_tx_result debug_noinline ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; size_t hdrlen, per_fragm, num_fragm, payload_len, left; struct sk_buff **frags, *first, *frag; @@ -677,9 +678,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) * This scenario is handled in __ieee80211_tx_prepare but extra * caution taken here as fragmented ampdu may cause Tx stop. */ - if (WARN_ON(tx->flags & IEEE80211_TX_CTL_AMPDU || - skb_get_queue_mapping(tx->skb) >= - ieee80211_num_regular_queues(&tx->local->hw))) + if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) return TX_DROP; first = tx->skb; @@ -951,7 +950,8 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, struct ieee80211_sub_if_data *sdata; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int hdrlen; + int hdrlen, tid; + u8 *qc, *state; memset(tx, 0, sizeof(*tx)); tx->skb = skb; @@ -982,6 +982,15 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, tx->sta = sta_info_get(local, hdr->addr1); + if (tx->sta && ieee80211_is_data_qos(hdr->frame_control)) { + qc = ieee80211_get_qos_ctl(hdr); + tid = *qc & IEEE80211_QOS_CTL_TID_MASK; + + state = &tx->sta->ampdu_mlme.tid_state_tx[tid]; + if (*state == HT_AGG_STATE_OPERATIONAL) + info->flags |= IEEE80211_TX_CTL_AMPDU; + } + if (is_multicast_ether_addr(hdr->addr1)) { tx->flags &= ~IEEE80211_TX_UNICAST; info->flags |= IEEE80211_TX_CTL_NO_ACK; @@ -1172,7 +1181,7 @@ retry: * queues, there's no reason for a driver to reject * a frame there, warn and drop it. */ - if (WARN_ON(queue >= ieee80211_num_regular_queues(&local->hw))) + if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) goto drop; store = &local->pending_packet[queue]; diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index d27ef7f2d4a7..ac71b38f7cb5 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -114,8 +114,8 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) { struct ieee80211_master_priv *mpriv = netdev_priv(dev); struct ieee80211_local *local = mpriv->local; + struct ieee80211_hw *hw = &local->hw; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct sta_info *sta; u16 queue; u8 tid; @@ -124,21 +124,19 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) if (unlikely(queue >= local->hw.queues)) queue = local->hw.queues - 1; - if (info->flags & IEEE80211_TX_CTL_REQUEUE) { + if (skb->requeue) { + if (!hw->ampdu_queues) + return queue; + rcu_read_lock(); sta = sta_info_get(local, hdr->addr1); tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; if (sta) { - struct ieee80211_hw *hw = &local->hw; int ampdu_queue = sta->tid_to_tx_q[tid]; if ((ampdu_queue < ieee80211_num_queues(hw)) && - test_bit(ampdu_queue, local->queue_pool)) { + test_bit(ampdu_queue, local->queue_pool)) queue = ampdu_queue; - info->flags |= IEEE80211_TX_CTL_AMPDU; - } else { - info->flags &= ~IEEE80211_TX_CTL_AMPDU; - } } rcu_read_unlock(); @@ -159,20 +157,18 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) *p++ = ack_policy | tid; *p = 0; + if (!hw->ampdu_queues) + return queue; + rcu_read_lock(); sta = sta_info_get(local, hdr->addr1); if (sta) { int ampdu_queue = sta->tid_to_tx_q[tid]; - struct ieee80211_hw *hw = &local->hw; if ((ampdu_queue < ieee80211_num_queues(hw)) && - test_bit(ampdu_queue, local->queue_pool)) { + test_bit(ampdu_queue, local->queue_pool)) queue = ampdu_queue; - info->flags |= IEEE80211_TX_CTL_AMPDU; - } else { - info->flags &= ~IEEE80211_TX_CTL_AMPDU; - } } rcu_read_unlock(); -- cgit v1.2.3 From d2372b315289aec9f565a855023c40654a5bff68 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Oct 2008 20:32:20 +0200 Subject: wireless: make regdom passing semantics simpler The regdom struct is given to the core, so it might as well free it in error conditions. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- Documentation/networking/regulatory.txt | 13 +++---------- include/net/wireless.h | 3 +-- net/wireless/nl80211.c | 5 +---- net/wireless/reg.c | 9 +++++---- 4 files changed, 10 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt index a96989a8ff35..357d4ba4f135 100644 --- a/Documentation/networking/regulatory.txt +++ b/Documentation/networking/regulatory.txt @@ -167,7 +167,6 @@ struct ieee80211_regdomain mydriver_jp_regdom = { Then in some part of your code after your wiphy has been registered: - int r; struct ieee80211_regdomain *rd; int size_of_regd; int num_rules = mydriver_jp_regdom.n_reg_rules; @@ -178,17 +177,11 @@ Then in some part of your code after your wiphy has been registered: rd = kzalloc(size_of_regd, GFP_KERNEL); if (!rd) - return -ENOMEM; + return -ENOMEM; memcpy(rd, &mydriver_jp_regdom, sizeof(struct ieee80211_regdomain)); - for (i=0; i < num_rules; i++) { + for (i=0; i < num_rules; i++) memcpy(&rd->reg_rules[i], &mydriver_jp_regdom.reg_rules[i], sizeof(struct ieee80211_reg_rule)); - } - r = regulatory_hint(hw->wiphy, NULL, rd); - if (r) { - kfree(rd); - return r; - } - + return regulatory_hint(hw->wiphy, NULL, rd); diff --git a/include/net/wireless.h b/include/net/wireless.h index 061fe5017e5c..6e3ea0159045 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -358,8 +358,7 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq) * for a regulatory domain structure for the respective country. If * a regulatory domain is build and passed you should set the alpha2 * if possible, otherwise set it to the special value of "99" which tells - * the wireless core it is unknown. If you pass a built regulatory domain - * and we return non zero you are in charge of kfree()'ing the structure. + * the wireless core it is unknown. * * Returns -EALREADY if *a regulatory domain* has already been set. Note that * this could be by another driver. It is safe for drivers to continue if diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9a16e9e6c5ca..f82cc9aa6908 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1935,12 +1935,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) mutex_lock(&cfg80211_drv_mutex); r = set_regdom(rd); mutex_unlock(&cfg80211_drv_mutex); - if (r) - goto bad_reg; - return r; -bad_reg: + bad_reg: kfree(rd); return -EINVAL; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 00c326b66c03..038f8f133c54 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -605,7 +605,6 @@ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, return r; } -/* If rd is not NULL and if this call fails the caller must free it */ int regulatory_hint(struct wiphy *wiphy, const char *alpha2, struct ieee80211_regdomain *rd) { @@ -690,6 +689,7 @@ void print_regdomain_info(const struct ieee80211_regdomain *rd) print_rd_rules(rd); } +/* Takes ownership of rd only if it doesn't fail */ static int __set_regdom(const struct ieee80211_regdomain *rd) { /* Some basic sanity checks first */ @@ -750,16 +750,17 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) /* Use this call to set the current regulatory domain. Conflicts with * multiple drivers can be ironed out later. Caller must've already - * kmalloc'd the rd structure. If this calls fails you should kfree() - * the passed rd. Caller must hold cfg80211_drv_mutex */ + * kmalloc'd the rd structure. Caller must hold cfg80211_drv_mutex */ int set_regdom(const struct ieee80211_regdomain *rd) { int r; /* Note that this doesn't update the wiphys, this is done below */ r = __set_regdom(rd); - if (r) + if (r) { + kfree(rd); return r; + } /* This would make this whole thing pointless */ BUG_ON(rd != cfg80211_regdomain); -- cgit v1.2.3 From be3d48106c1e5d075784e5e67928a6b5ffc0f3b6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Oct 2008 20:32:21 +0200 Subject: wireless: remove struct regdom hinting The code needs to be split out and cleaned up, so as a first step remove the capability, to add it back in a subsequent patch as a separate function. Also remove the publically facing return value of the function and the wiphy argument. A number of internal functions go from being generic helpers to just being used for alpha2 setting. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- Documentation/networking/regulatory.txt | 11 +++--- drivers/net/wireless/zd1211rw/zd_mac.c | 2 +- include/net/wireless.h | 23 +++--------- net/wireless/nl80211.c | 2 +- net/wireless/reg.c | 63 +++++++++------------------------ net/wireless/reg.h | 23 ++++-------- 6 files changed, 36 insertions(+), 88 deletions(-) (limited to 'include') diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt index 357d4ba4f135..dcf31648414a 100644 --- a/Documentation/networking/regulatory.txt +++ b/Documentation/networking/regulatory.txt @@ -131,11 +131,13 @@ are expected to do this during initialization. r = zd_reg2alpha2(mac->regdomain, alpha2); if (!r) - regulatory_hint(hw->wiphy, alpha2, NULL); + regulatory_hint(hw->wiphy, alpha2); Example code - drivers providing a built in regulatory domain: -------------------------------------------------------------- +[NOTE: This API is not currently available, it can be added when required] + If you have regulatory information you can obtain from your driver and you *need* to use this we let you build a regulatory domain structure and pass it to the wireless core. To do this you should @@ -182,6 +184,7 @@ Then in some part of your code after your wiphy has been registered: memcpy(rd, &mydriver_jp_regdom, sizeof(struct ieee80211_regdomain)); for (i=0; i < num_rules; i++) - memcpy(&rd->reg_rules[i], &mydriver_jp_regdom.reg_rules[i], - sizeof(struct ieee80211_reg_rule)); - return regulatory_hint(hw->wiphy, NULL, rd); + memcpy(&rd->reg_rules[i], + &mydriver_jp_regdom.reg_rules[i], + sizeof(struct ieee80211_reg_rule)); + regulatory_struct_hint(rd); diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index 2f0802b29c4b..07513e48b8f2 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -171,7 +171,7 @@ int zd_mac_init_hw(struct ieee80211_hw *hw) r = zd_reg2alpha2(mac->regdomain, alpha2); if (!r) - regulatory_hint(hw->wiphy, alpha2, NULL); + regulatory_hint(hw->wiphy, alpha2); r = 0; disable_int: diff --git a/include/net/wireless.h b/include/net/wireless.h index 6e3ea0159045..41294c5f6f8f 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -342,34 +342,19 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq) /** * regulatory_hint - driver hint to the wireless core a regulatory domain - * @wiphy: the driver's very own &struct wiphy + * @wiphy: the wireless device giving the hint (used only for reporting + * conflicts) * @alpha2: the ISO/IEC 3166 alpha2 the driver claims its regulatory domain * should be in. If @rd is set this should be NULL. Note that if you * set this to NULL you should still set rd->alpha2 to some accepted * alpha2. - * @rd: a complete regulatory domain provided by the driver. If passed - * the driver does not need to worry about freeing it. * * Wireless drivers can use this function to hint to the wireless core * what it believes should be the current regulatory domain by * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory * domain should be in or by providing a completely build regulatory domain. * If the driver provides an ISO/IEC 3166 alpha2 userspace will be queried - * for a regulatory domain structure for the respective country. If - * a regulatory domain is build and passed you should set the alpha2 - * if possible, otherwise set it to the special value of "99" which tells - * the wireless core it is unknown. - * - * Returns -EALREADY if *a regulatory domain* has already been set. Note that - * this could be by another driver. It is safe for drivers to continue if - * -EALREADY is returned, if drivers are not capable of world roaming they - * should not register more channels than they support. Right now we only - * support listening to the first driver hint. If the driver is capable - * of world roaming but wants to respect its own EEPROM mappings for - * specific regulatory domains it should register the @reg_notifier callback - * on the &struct wiphy. Returns 0 if the hint went through fine or through an - * intersection operation. Otherwise a standard error code is returned. + * for a regulatory domain structure for the respective country. */ -extern int regulatory_hint(struct wiphy *wiphy, - const char *alpha2, struct ieee80211_regdomain *rd); +extern void regulatory_hint(struct wiphy *wiphy, const char *alpha2); #endif /* __NET_WIRELESS_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f82cc9aa6908..5e1d658a8b5a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1695,7 +1695,7 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) return -EINVAL; #endif mutex_lock(&cfg80211_drv_mutex); - r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, NULL); + r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data); mutex_unlock(&cfg80211_drv_mutex); return r; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 038f8f133c54..dc10071deaaa 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -42,7 +42,10 @@ #include "core.h" #include "reg.h" -/* wiphy is set if this request's initiator is REGDOM_SET_BY_DRIVER */ +/* + * wiphy is set if this request's initiator is + * REGDOM_SET_BY_COUNTRY_IE or _DRIVER + */ struct regulatory_request { struct wiphy *wiphy; enum reg_set_by initiator; @@ -298,7 +301,7 @@ static int call_crda(const char *alpha2) /* This has the logic which determines when a new request * should be ignored. */ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, - char *alpha2, struct ieee80211_regdomain *rd) + const char *alpha2) { /* All initial requests are respected */ if (!last_request) @@ -343,22 +346,8 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, return 1; case REGDOM_SET_BY_DRIVER: BUG_ON(!wiphy); - if (last_request->initiator == REGDOM_SET_BY_DRIVER) { - /* Two separate drivers hinting different things, - * this is possible if you have two devices present - * on a system with different EEPROM regulatory - * readings. XXX: Do intersection, we support only - * the first regulatory hint for now */ - if (last_request->wiphy != wiphy) - return -EALREADY; - if (rd) - return -EALREADY; - /* Driver should not be trying to hint different - * regulatory domains! */ - BUG_ON(!alpha2_equal(alpha2, - cfg80211_regdomain->alpha2)); + if (last_request->initiator == REGDOM_SET_BY_DRIVER) return -EALREADY; - } if (last_request->initiator == REGDOM_SET_BY_CORE) return 0; /* XXX: Handle intersection, and add the @@ -557,40 +546,32 @@ void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) /* Caller must hold &cfg80211_drv_mutex */ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, - const char *alpha2, struct ieee80211_regdomain *rd) + const char *alpha2) { struct regulatory_request *request; - char *rd_alpha2; int r = 0; - r = ignore_request(wiphy, set_by, (char *) alpha2, rd); + r = ignore_request(wiphy, set_by, alpha2); if (r) return r; - if (rd) - rd_alpha2 = rd->alpha2; - else - rd_alpha2 = (char *) alpha2; - switch (set_by) { case REGDOM_SET_BY_CORE: case REGDOM_SET_BY_COUNTRY_IE: case REGDOM_SET_BY_DRIVER: case REGDOM_SET_BY_USER: request = kzalloc(sizeof(struct regulatory_request), - GFP_KERNEL); + GFP_KERNEL); if (!request) return -ENOMEM; - request->alpha2[0] = rd_alpha2[0]; - request->alpha2[1] = rd_alpha2[1]; + request->alpha2[0] = alpha2[0]; + request->alpha2[1] = alpha2[1]; request->initiator = set_by; request->wiphy = wiphy; kfree(last_request); last_request = request; - if (rd) - break; r = call_crda(alpha2); #ifndef CONFIG_WIRELESS_OLD_REGULATORY if (r) @@ -605,25 +586,13 @@ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, return r; } -int regulatory_hint(struct wiphy *wiphy, const char *alpha2, - struct ieee80211_regdomain *rd) +void regulatory_hint(struct wiphy *wiphy, const char *alpha2) { - int r; - BUG_ON(!rd && !alpha2); + BUG_ON(!alpha2); mutex_lock(&cfg80211_drv_mutex); - - r = __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, rd); - if (r || !rd) - goto unlock_and_exit; - - /* If the driver passed a regulatory domain we skipped asking - * userspace for one so we can now go ahead and set it */ - r = set_regdom(rd); - -unlock_and_exit: + __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2); mutex_unlock(&cfg80211_drv_mutex); - return r; } EXPORT_SYMBOL(regulatory_hint); @@ -792,11 +761,11 @@ int regulatory_init(void) * that is not a valid ISO / IEC 3166 alpha2 */ if (ieee80211_regdom[0] != 'E' || ieee80211_regdom[1] != 'U') err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, - ieee80211_regdom, NULL); + ieee80211_regdom); #else cfg80211_regdomain = cfg80211_world_regdom; - err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", NULL); + err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00"); if (err) printk(KERN_ERR "cfg80211: calling CRDA failed - " "unable to update world regulatory domain, " diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 0c1572b92fef..c9b6b6358bbe 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -11,30 +11,21 @@ int set_regdom(const struct ieee80211_regdomain *rd); /** * __regulatory_hint - hint to the wireless core a regulatory domain - * @wiphy: if a driver is providing the hint this is the driver's very - * own &struct wiphy + * @wiphy: if the hint comes from country information from an AP, this + * is required to be set to the wiphy that received the information * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain - * should be in. If @rd is set this should be NULL - * @rd: a complete regulatory domain, if passed the caller need not worry - * about freeing it + * should be in. * * The Wireless subsystem can use this function to hint to the wireless core * what it believes should be the current regulatory domain by * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory - * domain should be in or by providing a completely build regulatory domain. + * domain should be in. * - * Returns -EALREADY if *a regulatory domain* has already been set. Note that - * this could be by another driver. It is safe for drivers to continue if - * -EALREADY is returned, if drivers are not capable of world roaming they - * should not register more channels than they support. Right now we only - * support listening to the first driver hint. If the driver is capable - * of world roaming but wants to respect its own EEPROM mappings for - * specific regulatory domains it should register the @reg_notifier callback - * on the &struct wiphy. Returns 0 if the hint went through fine or through an - * intersection operation. Otherwise a standard error code is returned. + * Returns zero if all went fine, %-EALREADY if a regulatory domain had + * already been set or other standard error codes. * */ extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, - const char *alpha2, struct ieee80211_regdomain *rd); + const char *alpha2); #endif /* __NET_WIRELESS_REG_H */ -- cgit v1.2.3 From e25cf4a6945e0f859186231be7164ba565412e0a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 Oct 2008 08:51:20 +0200 Subject: mac80211: fix two kernel-doc warnings One parameter wasn't described and one I forgot to update when renaming it; also update TBDs in sta_info. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 4 +++- net/mac80211/sta_info.h | 38 ++++++++++++++++++++------------------ 2 files changed, 23 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index bba96a203885..0b983bed3829 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -287,7 +287,7 @@ enum mac80211_rate_control_flags { * * @idx: rate index to attempt to send with * @flags: rate control flags (&enum mac80211_rate_control_flags) - * @limit: number of retries before fallback + * @count: number of tries in this rate before going to the next rate * * A value of -1 for @idx indicates an invalid rate and, if used * in an array of retry rates, that no more rates should be tried. @@ -1902,6 +1902,8 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, * @short_preamble: whether mac80211 will request short-preamble transmission * if the selected rate supports it * @max_rate_idx: user-requested maximum rate (not MCS for now) + * @skb: the skb that will be transmitted, the control information in it needs + * to be filled in */ struct ieee80211_tx_rate_control { struct ieee80211_hw *hw; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 4ac372aa75ce..5ad9250b63ab 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -160,18 +160,20 @@ struct sta_ampdu_mlme { * @list: global linked list entry * @hnext: hash table linked list pointer * @local: pointer to the global information - * @sdata: TBD - * @key: TBD - * @rate_ctrl: TBD - * @rate_ctrl_priv: TBD + * @sdata: virtual interface this station belongs to + * @key: peer key negotiated with this station, if any + * @rate_ctrl: rate control algorithm reference + * @rate_ctrl_priv: rate control private per-STA pointer + * @last_tx_rate: rate used for last transmit, to report to userspace as + * "the" transmit rate * @lock: used for locking all fields that require locking, see comments * in the header file. * @flaglock: spinlock for flags accesses * @addr: MAC address of this STA * @aid: STA's unique AID (1..2007, 0 = not assigned yet), * only used in AP (and IBSS?) mode - * @listen_interval: TBD - * @pin_status: TBD + * @listen_interval: listen interval of this station, when we're acting as AP + * @pin_status: used internally for pinning a STA struct into memory * @flags: STA flags, see &enum ieee80211_sta_info_flags * @ps_tx_buf: buffer of frames to transmit to this station * when it leaves power saving state @@ -180,8 +182,8 @@ struct sta_ampdu_mlme { * power saving state * @rx_packets: Number of MSDUs received from this STA * @rx_bytes: Number of bytes received from this STA - * @wep_weak_iv_count: TBD - * @last_rx: TBD + * @wep_weak_iv_count: number of weak WEP IVs received from this station + * @last_rx: time (in jiffies) when last frame was received from this STA * @num_duplicates: number of duplicate frames received from this STA * @rx_fragments: number of received MPDUs * @rx_dropped: number of dropped MPDUs from this STA @@ -189,26 +191,26 @@ struct sta_ampdu_mlme { * @last_qual: qual of last received frame from this STA * @last_noise: noise of last received frame from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) - * @tx_filtered_count: TBD - * @tx_retry_failed: TBD - * @tx_retry_count: TBD + * @tx_filtered_count: number of frames the hardware filtered for this STA + * @tx_retry_failed: number of frames that failed retry + * @tx_retry_count: total number of retries for frames to this STA * @fail_avg: moving percentage of failed MSDUs * @tx_packets: number of RX/TX MSDUs - * @tx_bytes: TBD + * @tx_bytes: number of bytes transmitted to this STA * @tx_fragments: number of transmitted MPDUs * @last_txrate: description of the last used transmit rate - * @tid_seq: TBD - * @ampdu_mlme: TBD + * @tid_seq: per-TID sequence numbers for sending to this STA + * @ampdu_mlme: A-MPDU state machine state * @timer_to_tid: identity mapping to ID timers * @tid_to_tx_q: map tid to tx queue * @llid: Local link ID * @plid: Peer link ID * @reason: Cancel reason on PLINK_HOLDING state * @plink_retries: Retries in establishment - * @ignore_plink_timer: TBD - * @plink_state plink_state: TBD - * @plink_timeout: TBD - * @plink_timer: TBD + * @ignore_plink_timer: ignore the peer-link timer (used internally) + * @plink_state: peer link state + * @plink_timeout: timeout of peer link + * @plink_timer: peer link watch timer * @debugfs: debug filesystem info * @sta: station information we share with the driver */ -- cgit v1.2.3 From 127cafbb276266b1b8da967bfe25a062ab1d42ab Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 28 Oct 2008 10:51:53 +0800 Subject: tracepoint: introduce *_noupdate APIs. Impact: add new tracepoint APIs to allow the batched registration of probes new APIs separate tracepoint_probe_register(), tracepoint_probe_unregister() into 2 steps. The first step of them is just update tracepoint_entry, not connect or disconnect. this patch introduces tracepoint_probe_update_all() for update all. these APIs are very useful for registering lots of probes but just updating once. Another very important thing is that *_noupdate APIs do not require module_mutex. Signed-off-by: Lai Jiangshan Acked-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 4 ++ kernel/tracepoint.c | 170 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 136 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index c5bb39c7a770..63064e9403f2 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -112,6 +112,10 @@ extern int tracepoint_probe_register(const char *name, void *probe); */ extern int tracepoint_probe_unregister(const char *name, void *probe); +extern int tracepoint_probe_register_noupdate(const char *name, void *probe); +extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe); +extern void tracepoint_probe_update_all(void); + struct tracepoint_iter { struct module *module; struct tracepoint *tracepoint; diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 3e22867184e3..e96590f17de1 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -59,7 +59,10 @@ struct tracepoint_entry { }; struct tp_probes { - struct rcu_head rcu; + union { + struct rcu_head rcu; + struct list_head list; + } u; void *probes[0]; }; @@ -72,7 +75,7 @@ static inline void *allocate_probes(int count) static void rcu_free_old_probes(struct rcu_head *head) { - kfree(container_of(head, struct tp_probes, rcu)); + kfree(container_of(head, struct tp_probes, u.rcu)); } static inline void release_probes(void *old) @@ -80,7 +83,7 @@ static inline void release_probes(void *old) if (old) { struct tp_probes *tp_probes = container_of(old, struct tp_probes, probes[0]); - call_rcu(&tp_probes->rcu, rcu_free_old_probes); + call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes); } } @@ -299,6 +302,23 @@ static void tracepoint_update_probes(void) module_update_tracepoints(); } +static void *tracepoint_add_probe(const char *name, void *probe) +{ + struct tracepoint_entry *entry; + void *old; + + entry = get_tracepoint(name); + if (!entry) { + entry = add_tracepoint(name); + if (IS_ERR(entry)) + return entry; + } + old = tracepoint_entry_add_probe(entry, probe); + if (IS_ERR(old) && !entry->refcount) + remove_tracepoint(entry); + return old; +} + /** * tracepoint_probe_register - Connect a probe to a tracepoint * @name: tracepoint name @@ -309,36 +329,36 @@ static void tracepoint_update_probes(void) */ int tracepoint_probe_register(const char *name, void *probe) { - struct tracepoint_entry *entry; - int ret = 0; void *old; mutex_lock(&tracepoints_mutex); - entry = get_tracepoint(name); - if (!entry) { - entry = add_tracepoint(name); - if (IS_ERR(entry)) { - ret = PTR_ERR(entry); - goto end; - } - } - old = tracepoint_entry_add_probe(entry, probe); - if (IS_ERR(old)) { - if (!entry->refcount) - remove_tracepoint(entry); - ret = PTR_ERR(old); - goto end; - } + old = tracepoint_add_probe(name, probe); mutex_unlock(&tracepoints_mutex); + if (IS_ERR(old)) + return PTR_ERR(old); + tracepoint_update_probes(); /* may update entry */ release_probes(old); return 0; -end: - mutex_unlock(&tracepoints_mutex); - return ret; } EXPORT_SYMBOL_GPL(tracepoint_probe_register); +static void *tracepoint_remove_probe(const char *name, void *probe) +{ + struct tracepoint_entry *entry; + void *old; + + entry = get_tracepoint(name); + if (!entry) + return ERR_PTR(-ENOENT); + old = tracepoint_entry_remove_probe(entry, probe); + if (IS_ERR(old)) + return old; + if (!entry->refcount) + remove_tracepoint(entry); + return old; +} + /** * tracepoint_probe_unregister - Disconnect a probe from a tracepoint * @name: tracepoint name @@ -351,31 +371,105 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_register); */ int tracepoint_probe_unregister(const char *name, void *probe) { - struct tracepoint_entry *entry; void *old; - int ret = -ENOENT; mutex_lock(&tracepoints_mutex); - entry = get_tracepoint(name); - if (!entry) - goto end; - old = tracepoint_entry_remove_probe(entry, probe); - if (IS_ERR(old)) { - ret = PTR_ERR(old); - goto end; - } - if (!entry->refcount) - remove_tracepoint(entry); + old = tracepoint_remove_probe(name, probe); mutex_unlock(&tracepoints_mutex); + if (IS_ERR(old)) + return PTR_ERR(old); + tracepoint_update_probes(); /* may update entry */ release_probes(old); return 0; -end: - mutex_unlock(&tracepoints_mutex); - return ret; } EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); +static LIST_HEAD(old_probes); +static int need_update; + +static void tracepoint_add_old_probes(void *old) +{ + need_update = 1; + if (old) { + struct tp_probes *tp_probes = container_of(old, + struct tp_probes, probes[0]); + list_add(&tp_probes->u.list, &old_probes); + } +} + +/** + * tracepoint_probe_register_noupdate - register a probe but not connect + * @name: tracepoint name + * @probe: probe handler + * + * caller must call tracepoint_probe_update_all() + */ +int tracepoint_probe_register_noupdate(const char *name, void *probe) +{ + void *old; + + mutex_lock(&tracepoints_mutex); + old = tracepoint_add_probe(name, probe); + if (IS_ERR(old)) { + mutex_unlock(&tracepoints_mutex); + return PTR_ERR(old); + } + tracepoint_add_old_probes(old); + mutex_unlock(&tracepoints_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate); + +/** + * tracepoint_probe_unregister_noupdate - remove a probe but not disconnect + * @name: tracepoint name + * @probe: probe function pointer + * + * caller must call tracepoint_probe_update_all() + */ +int tracepoint_probe_unregister_noupdate(const char *name, void *probe) +{ + void *old; + + mutex_lock(&tracepoints_mutex); + old = tracepoint_remove_probe(name, probe); + if (IS_ERR(old)) { + mutex_unlock(&tracepoints_mutex); + return PTR_ERR(old); + } + tracepoint_add_old_probes(old); + mutex_unlock(&tracepoints_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(tracepoint_probe_unregister_noupdate); + +/** + * tracepoint_probe_update_all - update tracepoints + */ +void tracepoint_probe_update_all(void) +{ + LIST_HEAD(release_probes); + struct tp_probes *pos, *next; + + mutex_lock(&tracepoints_mutex); + if (!need_update) { + mutex_unlock(&tracepoints_mutex); + return; + } + if (!list_empty(&old_probes)) + list_replace_init(&old_probes, &release_probes); + need_update = 0; + mutex_unlock(&tracepoints_mutex); + + tracepoint_update_probes(); + list_for_each_entry_safe(pos, next, &release_probes, u.list) { + list_del(&pos->u.list); + call_rcu_sched(&pos->u.rcu, rcu_free_old_probes); + } +} +EXPORT_SYMBOL_GPL(tracepoint_probe_update_all); + /** * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. * @tracepoint: current tracepoints (in), next tracepoint (out) -- cgit v1.2.3 From 7e5e26a3d8ac4bcadb380073dc9604c07a9a6198 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 31 Oct 2008 09:36:38 -0400 Subject: ftrace: fix hardirq header for non ftrace archs Impact: build fix for non-ftrace architectures Not all archs implement ftrace, and therefore do not have an asm/ftrace.h. This patch corrects the problem. The ftrace_nmi_enter/exit now must be defined for all archs that implement dynamic ftrace. Currently, only x86 does. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/arm/include/asm/ftrace.h | 5 ----- arch/powerpc/include/asm/ftrace.h | 5 ----- arch/sh/include/asm/ftrace.h | 5 ----- arch/sparc/include/asm/ftrace.h | 5 ----- arch/x86/include/asm/ftrace.h | 16 ---------------- include/linux/ftrace.h | 5 ++++- include/linux/hardirq.h | 2 +- 7 files changed, 5 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h index 3f3a1d1508ea..39c8bc1a006a 100644 --- a/arch/arm/include/asm/ftrace.h +++ b/arch/arm/include/asm/ftrace.h @@ -1,11 +1,6 @@ #ifndef _ASM_ARM_FTRACE #define _ASM_ARM_FTRACE -#ifndef __ASSEMBLY__ -static inline void ftrace_nmi_enter(void) { } -static inline void ftrace_nmi_exit(void) { } -#endif - #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 1cd72700fbc0..b298f7a631e6 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -1,11 +1,6 @@ #ifndef _ASM_POWERPC_FTRACE #define _ASM_POWERPC_FTRACE -#ifndef __ASSEMBLY__ -static inline void ftrace_nmi_enter(void) { } -static inline void ftrace_nmi_exit(void) { } -#endif - #ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((long)(_mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h index 31ada0370cb6..3aed362c9463 100644 --- a/arch/sh/include/asm/ftrace.h +++ b/arch/sh/include/asm/ftrace.h @@ -1,11 +1,6 @@ #ifndef __ASM_SH_FTRACE_H #define __ASM_SH_FTRACE_H -#ifndef __ASSEMBLY__ -static inline void ftrace_nmi_enter(void) { } -static inline void ftrace_nmi_exit(void) { } -#endif - #ifndef __ASSEMBLY__ extern void mcount(void); #endif diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h index 62055ac0496e..d27716cd38c1 100644 --- a/arch/sparc/include/asm/ftrace.h +++ b/arch/sparc/include/asm/ftrace.h @@ -1,11 +1,6 @@ #ifndef _ASM_SPARC64_FTRACE #define _ASM_SPARC64_FTRACE -#ifndef __ASSEMBLY__ -static inline void ftrace_nmi_enter(void) { } -static inline void ftrace_nmi_exit(void) { } -#endif - #ifdef CONFIG_MCOUNT #define MCOUNT_ADDR ((long)(_mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index a23468194b8c..f8173ed1c970 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -17,23 +17,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) */ return addr - 1; } - -#ifdef CONFIG_DYNAMIC_FTRACE -extern void ftrace_nmi_enter(void); -extern void ftrace_nmi_exit(void); -#else -static inline void ftrace_nmi_enter(void) { } -static inline void ftrace_nmi_exit(void) { } -#endif #endif /* __ASSEMBLY__ */ - -#else /* CONFIG_FUNCTION_TRACER */ - -#ifndef __ASSEMBLY__ -static inline void ftrace_nmi_enter(void) { } -static inline void ftrace_nmi_exit(void) { } -#endif - #endif /* CONFIG_FUNCTION_TRACER */ #endif /* _ASM_X86_FTRACE_H */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e46a7b34037c..0ad1b48aea69 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -44,7 +44,6 @@ static inline void ftrace_kill(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE - enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FAILED = (1 << 1), @@ -105,6 +104,8 @@ extern void ftrace_release(void *start, unsigned long size); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); +extern void ftrace_nmi_enter(void); +extern void ftrace_nmi_exit(void); #else # define skip_trace(ip) ({ 0; }) @@ -113,6 +114,8 @@ extern void ftrace_enable_daemon(void); # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) static inline void ftrace_release(void *start, unsigned long size) { } +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 0087cb43becf..ffc16ab5a878 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -4,8 +4,8 @@ #include #include #include +#include #include -#include #include /* -- cgit v1.2.3 From 29cbda77a67cf263d636feea65d3bbc9c7de2e24 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 Nov 2008 09:16:39 -0800 Subject: rcu: increase RCU stall-check timeouts Impact: increase timeout of debug check feature Increase RCU stall period timeouts to reduce the likelyhood of false positives. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rcuclassic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 5f89b62e6983..301dda829e37 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -41,7 +41,7 @@ #include #ifdef CONFIG_RCU_CPU_STALL_DETECTOR -#define RCU_SECONDS_TILL_STALL_CHECK ( 3 * HZ) /* for rcp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rcp->jiffies_stall */ #define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ -- cgit v1.2.3 From 48148938b494cd57029a43c758e9972307a31d2a Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Mon, 3 Nov 2008 17:08:56 -0800 Subject: IPVS: Remove supports_ipv6 scheduler flag Remove the 'supports_ipv6' scheduler flag since all schedulers now support IPv6. Signed-off-by: Julius Volz Signed-off-by: David S. Miller --- include/net/ip_vs.h | 3 --- net/netfilter/ipvs/ip_vs_ctl.c | 24 ++++++------------------ net/netfilter/ipvs/ip_vs_dh.c | 3 --- net/netfilter/ipvs/ip_vs_lblc.c | 3 --- net/netfilter/ipvs/ip_vs_lblcr.c | 3 --- net/netfilter/ipvs/ip_vs_lc.c | 3 --- net/netfilter/ipvs/ip_vs_nq.c | 3 --- net/netfilter/ipvs/ip_vs_rr.c | 3 --- net/netfilter/ipvs/ip_vs_sed.c | 3 --- net/netfilter/ipvs/ip_vs_sh.c | 3 --- net/netfilter/ipvs/ip_vs_wlc.c | 3 --- net/netfilter/ipvs/ip_vs_wrr.c | 3 --- 12 files changed, 6 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index fc63353779f0..8f6abf4883e3 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -503,9 +503,6 @@ struct ip_vs_scheduler { char *name; /* scheduler name */ atomic_t refcnt; /* reference counter */ struct module *module; /* THIS_MODULE/NULL */ -#ifdef CONFIG_IP_VS_IPV6 - int supports_ipv6; /* scheduler has IPv6 support */ -#endif /* scheduler initializing service */ int (*init_service)(struct ip_vs_service *svc); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 98e0a65646a1..d0ccdaff4276 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1168,15 +1168,9 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, } #ifdef CONFIG_IP_VS_IPV6 - if (u->af == AF_INET6) { - if (!sched->supports_ipv6) { - ret = -EAFNOSUPPORT; - goto out_err; - } - if ((u->netmask < 1) || (u->netmask > 128)) { - ret = -EINVAL; - goto out_err; - } + if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { + ret = -EINVAL; + goto out_err; } #endif @@ -1272,15 +1266,9 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) old_sched = sched; #ifdef CONFIG_IP_VS_IPV6 - if (u->af == AF_INET6) { - if (!sched->supports_ipv6) { - ret = -EAFNOSUPPORT; - goto out; - } - if ((u->netmask < 1) || (u->netmask > 128)) { - ret = -EINVAL; - goto out; - } + if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { + ret = -EINVAL; + goto out; } #endif diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index d8258e0cb58c..a9dac74bb13f 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -243,9 +243,6 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .init_service = ip_vs_dh_init_svc, .done_service = ip_vs_dh_done_svc, .update_service = ip_vs_dh_update_svc, diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 135a59454f1f..4256cfad8d31 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -534,9 +534,6 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .init_service = ip_vs_lblc_init_svc, .done_service = ip_vs_lblc_done_svc, .schedule = ip_vs_lblc_schedule, diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 4d6534a71b8f..321b49fa41d8 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -735,9 +735,6 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .init_service = ip_vs_lblcr_init_svc, .done_service = ip_vs_lblcr_done_svc, .schedule = ip_vs_lblcr_schedule, diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index b69f808ac461..51912cab777b 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -81,9 +81,6 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = { .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .schedule = ip_vs_lc_schedule, }; diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index 9a2d8033f08f..6758ad2ceaaf 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -116,9 +116,6 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .schedule = ip_vs_nq_schedule, }; diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index a22195f68ac4..8fb51c169eb8 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -89,9 +89,6 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .init_service = ip_vs_rr_init_svc, .update_service = ip_vs_rr_update_svc, .schedule = ip_vs_rr_schedule, diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index 7d2f22f04b83..691a6a0086e1 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -118,9 +118,6 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .schedule = ip_vs_sed_schedule, }; diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 4074ccf49208..0e53955ef139 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -240,9 +240,6 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .init_service = ip_vs_sh_init_svc, .done_service = ip_vs_sh_done_svc, .update_service = ip_vs_sh_update_svc, diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 8c596e712599..57b452bbb4ea 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -106,9 +106,6 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .schedule = ip_vs_wlc_schedule, }; diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 7ea92fed50bf..2f618dc29c5b 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -213,9 +213,6 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), -#ifdef CONFIG_IP_VS_IPV6 - .supports_ipv6 = 1, -#endif .init_service = ip_vs_wrr_init_svc, .done_service = ip_vs_wrr_done_svc, .update_service = ip_vs_wrr_update_svc, -- cgit v1.2.3 From 6cf3f41e6c08bca6641a695449791c38a25f35ff Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Mon, 3 Nov 2008 18:16:50 -0800 Subject: bonding, net: Move last_rx update into bonding recv logic The only user of the net_device->last_rx field is bonding. This patch adds a conditional update of last_rx to the bonding special logic in skb_bond_should_drop, causing last_rx to only be updated when the ARP monitor is running. This frees network device drivers from the necessity of updating last_rx, which can have cache line thrash issues. Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 ++ drivers/net/bonding/bond_sysfs.c | 3 +++ include/linux/if.h | 1 + include/linux/netdevice.h | 32 ++++++++++++++++++-------------- 4 files changed, 24 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 56c823c175fe..39575d764974 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4564,6 +4564,8 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params) bond_dev->tx_queue_len = 0; bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; bond_dev->priv_flags |= IFF_BONDING; + if (bond->params.arp_interval) + bond_dev->priv_flags |= IFF_MASTER_ARPMON; /* At first, we block adding VLANs. That's the only way to * prevent problems that occur when adding VLANs over an diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 296a865b75d2..e400d7dfdfc8 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -620,6 +620,8 @@ static ssize_t bonding_store_arp_interval(struct device *d, ": %s: Setting ARP monitoring interval to %d.\n", bond->dev->name, new_value); bond->params.arp_interval = new_value; + if (bond->params.arp_interval) + bond->dev->priv_flags |= IFF_MASTER_ARPMON; if (bond->params.miimon) { printk(KERN_INFO DRV_NAME ": %s: ARP monitoring cannot be used with MII monitoring. " @@ -1039,6 +1041,7 @@ static ssize_t bonding_store_miimon(struct device *d, "ARP monitoring. Disabling ARP monitoring...\n", bond->dev->name); bond->params.arp_interval = 0; + bond->dev->priv_flags &= ~IFF_MASTER_ARPMON; if (bond->params.arp_validate) { bond_unregister_arp(bond); bond->params.arp_validate = diff --git a/include/linux/if.h b/include/linux/if.h index 65246846c844..2a6e29620a96 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -65,6 +65,7 @@ #define IFF_BONDING 0x20 /* bonding master or slave */ #define IFF_SLAVE_NEEDARP 0x40 /* need ARPs for validation */ #define IFF_ISATAP 0x80 /* ISATAP interface (RFC4214) */ +#define IFF_MASTER_ARPMON 0x100 /* bonding master, ARP mon in use */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9d77b1d7dca8..f1b0dbe58464 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1742,22 +1742,26 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) struct net_device *dev = skb->dev; struct net_device *master = dev->master; - if (master && - (dev->priv_flags & IFF_SLAVE_INACTIVE)) { - if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && - skb->protocol == __constant_htons(ETH_P_ARP)) - return 0; - - if (master->priv_flags & IFF_MASTER_ALB) { - if (skb->pkt_type != PACKET_BROADCAST && - skb->pkt_type != PACKET_MULTICAST) + if (master) { + if (master->priv_flags & IFF_MASTER_ARPMON) + dev->last_rx = jiffies; + + if (dev->priv_flags & IFF_SLAVE_INACTIVE) { + if ((dev->priv_flags & IFF_SLAVE_NEEDARP) && + skb->protocol == __constant_htons(ETH_P_ARP)) return 0; - } - if (master->priv_flags & IFF_MASTER_8023AD && - skb->protocol == __constant_htons(ETH_P_SLOW)) - return 0; - return 1; + if (master->priv_flags & IFF_MASTER_ALB) { + if (skb->pkt_type != PACKET_BROADCAST && + skb->pkt_type != PACKET_MULTICAST) + return 0; + } + if (master->priv_flags & IFF_MASTER_8023AD && + skb->protocol == __constant_htons(ETH_P_SLOW)) + return 0; + + return 1; + } } return 0; } -- cgit v1.2.3 From 5f7340eff8f68f41b7e5c7ad47ec4cd1ea1afb40 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Tue, 4 Nov 2008 14:21:08 +0100 Subject: netfilter: xt_NFLOG: don't call nf_log_packet in NFLOG module. This patch modifies xt_NFLOG to suppress the call to nf_log_packet() function. The call of this wrapper in xt_NFLOG was causing NFLOG to use the first initialized module. Thus, if ipt_ULOG is loaded before nfnetlink_log all NFLOG rules are treated as plain LOG rules. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy --- include/net/netfilter/nfnetlink_log.h | 14 ++++++++++++++ net/netfilter/nfnetlink_log.c | 3 ++- net/netfilter/xt_NFLOG.c | 5 +++-- 3 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 include/net/netfilter/nfnetlink_log.h (limited to 'include') diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h new file mode 100644 index 000000000000..9b67f948a8d7 --- /dev/null +++ b/include/net/netfilter/nfnetlink_log.h @@ -0,0 +1,14 @@ +#ifndef _KER_NFNETLINK_LOG_H +#define _KER_NFNETLINK_LOG_H + +void +nfulnl_log_packet(unsigned int pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *li_user, + const char *prefix); + +#endif /* _KER_NFNETLINK_LOG_H */ + diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 41e0105d3828..a51892b3f01a 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -533,7 +533,7 @@ static struct nf_loginfo default_loginfo = { }; /* log handler for internal netfilter logging api */ -static void +void nfulnl_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, @@ -648,6 +648,7 @@ alloc_failure: /* FIXME: statistics */ goto unlock_and_release; } +EXPORT_SYMBOL_GPL(nfulnl_log_packet); static int nfulnl_rcv_nl_event(struct notifier_block *this, diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 50e3a52d3b31..a57c5cf018ec 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -13,6 +13,7 @@ #include #include #include +#include MODULE_AUTHOR("Patrick McHardy "); MODULE_DESCRIPTION("Xtables: packet logging to netlink using NFLOG"); @@ -31,8 +32,8 @@ nflog_tg(struct sk_buff *skb, const struct xt_target_param *par) li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(par->family, par->hooknum, skb, par->in, - par->out, &li, "%s", info->prefix); + nfulnl_log_packet(par->family, par->hooknum, skb, par->in, + par->out, &li, info->prefix); return XT_CONTINUE; } -- cgit v1.2.3 From 511061e2dd1b84bb21bb97c9216a19606c29ac02 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:22:55 +0100 Subject: netfilter: netns ebtables: part 1 * propagate netns from userspace, register table in passed netns * remporarily register every ebt_table in init_net P. S.: one needs to add ".netns_ok = 1" to igmp_protocol to test with ebtables(8) in netns. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 2 +- net/bridge/netfilter/ebtable_broute.c | 2 +- net/bridge/netfilter/ebtable_filter.c | 2 +- net/bridge/netfilter/ebtable_nat.c | 2 +- net/bridge/netfilter/ebtables.c | 27 ++++++++++++++------------- 5 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index d45e29cd1cfb..624e7883068c 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -300,7 +300,7 @@ struct ebt_table #define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \ ~(__alignof__(struct ebt_replace)-1)) -extern int ebt_register_table(struct ebt_table *table); +extern int ebt_register_table(struct net *net, struct ebt_table *table); extern void ebt_unregister_table(struct ebt_table *table); extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 246626bb0c87..1731ce8f7479 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -66,7 +66,7 @@ static int __init ebtable_broute_init(void) { int ret; - ret = ebt_register_table(&broute_table); + ret = ebt_register_table(&init_net, &broute_table); if (ret < 0) return ret; /* see br_input.c */ diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 1a58af51a2e2..af8953c9a57c 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -95,7 +95,7 @@ static int __init ebtable_filter_init(void) { int ret; - ret = ebt_register_table(&frame_filter); + ret = ebt_register_table(&init_net, &frame_filter); if (ret < 0) return ret; ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index f60c1e78e575..bafe16029bd7 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -102,7 +102,7 @@ static int __init ebtable_nat_init(void) { int ret; - ret = ebt_register_table(&frame_nat); + ret = ebt_register_table(&init_net, &frame_nat); if (ret < 0) return ret; ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 0fa208e86405..c1a82b2826eb 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -55,7 +55,6 @@ static DEFINE_MUTEX(ebt_mutex); -static LIST_HEAD(ebt_tables); static struct xt_target ebt_standard_target = { .name = "standard", @@ -315,9 +314,11 @@ find_inlist_lock(struct list_head *head, const char *name, const char *prefix, } static inline struct ebt_table * -find_table_lock(const char *name, int *error, struct mutex *mutex) +find_table_lock(struct net *net, const char *name, int *error, + struct mutex *mutex) { - return find_inlist_lock(&ebt_tables, name, "ebtable_", error, mutex); + return find_inlist_lock(&net->xt.tables[NFPROTO_BRIDGE], name, + "ebtable_", error, mutex); } static inline int @@ -944,7 +945,7 @@ static void get_counters(struct ebt_counter *oldcounters, } /* replace the table */ -static int do_replace(void __user *user, unsigned int len) +static int do_replace(struct net *net, void __user *user, unsigned int len) { int ret, i, countersize; struct ebt_table_info *newinfo; @@ -1016,7 +1017,7 @@ static int do_replace(void __user *user, unsigned int len) if (ret != 0) goto free_counterstmp; - t = find_table_lock(tmp.name, &ret, &ebt_mutex); + t = find_table_lock(net, tmp.name, &ret, &ebt_mutex); if (!t) { ret = -ENOENT; goto free_iterate; @@ -1097,7 +1098,7 @@ free_newinfo: return ret; } -int ebt_register_table(struct ebt_table *table) +int ebt_register_table(struct net *net, struct ebt_table *table) { struct ebt_table_info *newinfo; struct ebt_table *t; @@ -1157,7 +1158,7 @@ int ebt_register_table(struct ebt_table *table) if (ret != 0) goto free_chainstack; - list_for_each_entry(t, &ebt_tables, list) { + list_for_each_entry(t, &net->xt.tables[NFPROTO_BRIDGE], list) { if (strcmp(t->name, table->name) == 0) { ret = -EEXIST; BUGPRINT("Table name already exists\n"); @@ -1170,7 +1171,7 @@ int ebt_register_table(struct ebt_table *table) ret = -ENOENT; goto free_unlock; } - list_add(&table->list, &ebt_tables); + list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]); mutex_unlock(&ebt_mutex); return 0; free_unlock: @@ -1208,7 +1209,7 @@ void ebt_unregister_table(struct ebt_table *table) } /* userspace just supplied us with counters */ -static int update_counters(void __user *user, unsigned int len) +static int update_counters(struct net *net, void __user *user, unsigned int len) { int i, ret; struct ebt_counter *tmp; @@ -1228,7 +1229,7 @@ static int update_counters(void __user *user, unsigned int len) return -ENOMEM; } - t = find_table_lock(hlp.name, &ret, &ebt_mutex); + t = find_table_lock(net, hlp.name, &ret, &ebt_mutex); if (!t) goto free_tmp; @@ -1386,10 +1387,10 @@ static int do_ebt_set_ctl(struct sock *sk, switch(cmd) { case EBT_SO_SET_ENTRIES: - ret = do_replace(user, len); + ret = do_replace(sock_net(sk), user, len); break; case EBT_SO_SET_COUNTERS: - ret = update_counters(user, len); + ret = update_counters(sock_net(sk), user, len); break; default: ret = -EINVAL; @@ -1406,7 +1407,7 @@ static int do_ebt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) if (copy_from_user(&tmp, user, sizeof(tmp))) return -EFAULT; - t = find_table_lock(tmp.name, &ret, &ebt_mutex); + t = find_table_lock(sock_net(sk), tmp.name, &ret, &ebt_mutex); if (!t) return ret; -- cgit v1.2.3 From 6beceee5aa2cb94c4ae9f0784c7d3135d343f5b5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:27:15 +0100 Subject: netfilter: netns ebtables: part 2 * return ebt_table from ebt_register_table(), module code will save it into per-netns data for unregistration * duplicate ebt_table at the very beginning of registration -- it's added into list, so one ebt_table wouldn't end up in many lists (and each netns has different one) * introduce underscored tables in individial modules, this is temporary to not break bisection. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/linux/netfilter_bridge/ebtables.h | 3 ++- net/bridge/netfilter/ebtable_broute.c | 19 +++++++++---------- net/bridge/netfilter/ebtable_filter.c | 17 +++++++++-------- net/bridge/netfilter/ebtable_nat.c | 19 ++++++++++--------- net/bridge/netfilter/ebtables.c | 23 +++++++++++++++++------ 5 files changed, 47 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 624e7883068c..e40ddb94b1af 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -300,7 +300,8 @@ struct ebt_table #define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \ ~(__alignof__(struct ebt_replace)-1)) -extern int ebt_register_table(struct net *net, struct ebt_table *table); +extern struct ebt_table *ebt_register_table(struct net *net, + struct ebt_table *table); extern void ebt_unregister_table(struct ebt_table *table); extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 1731ce8f7479..3277d682dfcf 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -41,22 +41,23 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table broute_table = +static struct ebt_table __broute_table = { .name = "broute", .table = &initial_table, .valid_hooks = 1 << NF_BR_BROUTING, - .lock = __RW_LOCK_UNLOCKED(broute_table.lock), + .lock = __RW_LOCK_UNLOCKED(__broute_table.lock), .check = check, .me = THIS_MODULE, }; +static struct ebt_table *broute_table; static int ebt_broute(struct sk_buff *skb) { int ret; ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL, - &broute_table); + broute_table); if (ret == NF_DROP) return 1; /* route it */ return 0; /* bridge it */ @@ -64,21 +65,19 @@ static int ebt_broute(struct sk_buff *skb) static int __init ebtable_broute_init(void) { - int ret; - - ret = ebt_register_table(&init_net, &broute_table); - if (ret < 0) - return ret; + broute_table = ebt_register_table(&init_net, &__broute_table); + if (IS_ERR(broute_table)) + return PTR_ERR(broute_table); /* see br_input.c */ rcu_assign_pointer(br_should_route_hook, ebt_broute); - return ret; + return 0; } static void __exit ebtable_broute_fini(void) { rcu_assign_pointer(br_should_route_hook, NULL); synchronize_net(); - ebt_unregister_table(&broute_table); + ebt_unregister_table(broute_table); } module_init(ebtable_broute_init); diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index af8953c9a57c..596564c7aa58 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -50,21 +50,22 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table frame_filter = +static struct ebt_table __frame_filter = { .name = "filter", .table = &initial_table, .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(frame_filter.lock), + .lock = __RW_LOCK_UNLOCKED(__frame_filter.lock), .check = check, .me = THIS_MODULE, }; +static struct ebt_table *frame_filter; static unsigned int ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, &frame_filter); + return ebt_do_table(hook, skb, in, out, frame_filter); } static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { @@ -95,19 +96,19 @@ static int __init ebtable_filter_init(void) { int ret; - ret = ebt_register_table(&init_net, &frame_filter); - if (ret < 0) - return ret; + frame_filter = ebt_register_table(&init_net, &__frame_filter); + if (IS_ERR(frame_filter)) + return PTR_ERR(frame_filter); ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); if (ret < 0) - ebt_unregister_table(&frame_filter); + ebt_unregister_table(frame_filter); return ret; } static void __exit ebtable_filter_fini(void) { nf_unregister_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); - ebt_unregister_table(&frame_filter); + ebt_unregister_table(frame_filter); } module_init(ebtable_filter_init); diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index bafe16029bd7..0d8fc5bcddd1 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -50,28 +50,29 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table frame_nat = +static struct ebt_table __frame_nat = { .name = "nat", .table = &initial_table, .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(frame_nat.lock), + .lock = __RW_LOCK_UNLOCKED(__frame_nat.lock), .check = check, .me = THIS_MODULE, }; +static struct ebt_table *frame_nat; static unsigned int ebt_nat_dst(unsigned int hook, struct sk_buff *skb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, &frame_nat); + return ebt_do_table(hook, skb, in, out, frame_nat); } static unsigned int ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, &frame_nat); + return ebt_do_table(hook, skb, in, out, frame_nat); } static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { @@ -102,19 +103,19 @@ static int __init ebtable_nat_init(void) { int ret; - ret = ebt_register_table(&init_net, &frame_nat); - if (ret < 0) - return ret; + frame_nat = ebt_register_table(&init_net, &__frame_nat); + if (IS_ERR(frame_nat)) + return PTR_ERR(frame_nat); ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); if (ret < 0) - ebt_unregister_table(&frame_nat); + ebt_unregister_table(frame_nat); return ret; } static void __exit ebtable_nat_fini(void) { nf_unregister_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); - ebt_unregister_table(&frame_nat); + ebt_unregister_table(frame_nat); } module_init(ebtable_nat_init); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index c1a82b2826eb..82e17527e21e 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1098,7 +1098,7 @@ free_newinfo: return ret; } -int ebt_register_table(struct net *net, struct ebt_table *table) +struct ebt_table *ebt_register_table(struct net *net, struct ebt_table *table) { struct ebt_table_info *newinfo; struct ebt_table *t; @@ -1110,14 +1110,21 @@ int ebt_register_table(struct net *net, struct ebt_table *table) repl->entries_size == 0 || repl->counters || table->private) { BUGPRINT("Bad table data for ebt_register_table!!!\n"); - return -EINVAL; + return ERR_PTR(-EINVAL); + } + + /* Don't add one table to multiple lists. */ + table = kmemdup(table, sizeof(struct ebt_table), GFP_KERNEL); + if (!table) { + ret = -ENOMEM; + goto out; } countersize = COUNTER_OFFSET(repl->nentries) * nr_cpu_ids; newinfo = vmalloc(sizeof(*newinfo) + countersize); ret = -ENOMEM; if (!newinfo) - return -ENOMEM; + goto free_table; p = vmalloc(repl->entries_size); if (!p) @@ -1149,7 +1156,7 @@ int ebt_register_table(struct net *net, struct ebt_table *table) if (table->check && table->check(newinfo, table->valid_hooks)) { BUGPRINT("The table doesn't like its own initial data, lol\n"); - return -EINVAL; + return ERR_PTR(-EINVAL); } table->private = newinfo; @@ -1173,7 +1180,7 @@ int ebt_register_table(struct net *net, struct ebt_table *table) } list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]); mutex_unlock(&ebt_mutex); - return 0; + return table; free_unlock: mutex_unlock(&ebt_mutex); free_chainstack: @@ -1185,7 +1192,10 @@ free_chainstack: vfree(newinfo->entries); free_newinfo: vfree(newinfo); - return ret; +free_table: + kfree(table); +out: + return ERR_PTR(ret); } void ebt_unregister_table(struct ebt_table *table) @@ -1206,6 +1216,7 @@ void ebt_unregister_table(struct ebt_table *table) vfree(table->private->chainstack); } vfree(table->private); + kfree(table); } /* userspace just supplied us with counters */ -- cgit v1.2.3 From 8157e6d16af86e4a8d31a035db7be02a8a171c26 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:29:03 +0100 Subject: netfilter: netns ebtables: ebtable_broute in netns Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/x_tables.h | 3 +++ net/bridge/netfilter/ebtable_broute.c | 35 +++++++++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h index b8093971ccb4..055e684d29b6 100644 --- a/include/net/netns/x_tables.h +++ b/include/net/netns/x_tables.h @@ -4,7 +4,10 @@ #include #include +struct ebt_table; + struct netns_xt { struct list_head tables[NFPROTO_NUMPROTO]; + struct ebt_table *broute_table; }; #endif diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 3277d682dfcf..8604dfc1fc3b 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -41,33 +41,52 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table __broute_table = +static struct ebt_table broute_table = { .name = "broute", .table = &initial_table, .valid_hooks = 1 << NF_BR_BROUTING, - .lock = __RW_LOCK_UNLOCKED(__broute_table.lock), + .lock = __RW_LOCK_UNLOCKED(broute_table.lock), .check = check, .me = THIS_MODULE, }; -static struct ebt_table *broute_table; static int ebt_broute(struct sk_buff *skb) { int ret; ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL, - broute_table); + dev_net(skb->dev)->xt.broute_table); if (ret == NF_DROP) return 1; /* route it */ return 0; /* bridge it */ } +static int __net_init broute_net_init(struct net *net) +{ + net->xt.broute_table = ebt_register_table(net, &broute_table); + if (IS_ERR(net->xt.broute_table)) + return PTR_ERR(net->xt.broute_table); + return 0; +} + +static void __net_exit broute_net_exit(struct net *net) +{ + ebt_unregister_table(net->xt.broute_table); +} + +static struct pernet_operations broute_net_ops = { + .init = broute_net_init, + .exit = broute_net_exit, +}; + static int __init ebtable_broute_init(void) { - broute_table = ebt_register_table(&init_net, &__broute_table); - if (IS_ERR(broute_table)) - return PTR_ERR(broute_table); + int ret; + + ret = register_pernet_subsys(&broute_net_ops); + if (ret < 0) + return ret; /* see br_input.c */ rcu_assign_pointer(br_should_route_hook, ebt_broute); return 0; @@ -77,7 +96,7 @@ static void __exit ebtable_broute_fini(void) { rcu_assign_pointer(br_should_route_hook, NULL); synchronize_net(); - ebt_unregister_table(broute_table); + unregister_pernet_subsys(&broute_net_ops); } module_init(ebtable_broute_init); -- cgit v1.2.3 From 4aad10938d4e4e8364b664cd5420c3bfeb9b679b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:29:58 +0100 Subject: netfilter: netns ebtables: ebtable_filter in netns Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/x_tables.h | 1 + net/bridge/netfilter/ebtable_filter.c | 50 ++++++++++++++++++++++++++--------- 2 files changed, 38 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h index 055e684d29b6..d258e16c894e 100644 --- a/include/net/netns/x_tables.h +++ b/include/net/netns/x_tables.h @@ -9,5 +9,6 @@ struct ebt_table; struct netns_xt { struct list_head tables[NFPROTO_NUMPROTO]; struct ebt_table *broute_table; + struct ebt_table *frame_filter; }; #endif diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 596564c7aa58..2b2e8040a9c6 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -50,41 +50,47 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table __frame_filter = +static struct ebt_table frame_filter = { .name = "filter", .table = &initial_table, .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(__frame_filter.lock), + .lock = __RW_LOCK_UNLOCKED(frame_filter.lock), .check = check, .me = THIS_MODULE, }; -static struct ebt_table *frame_filter; static unsigned int -ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, +ebt_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, frame_filter); + return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_filter); +} + +static unsigned int +ebt_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, int (*okfn)(struct sk_buff *)) +{ + return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_filter); } static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { { - .hook = ebt_hook, + .hook = ebt_in_hook, .owner = THIS_MODULE, .pf = PF_BRIDGE, .hooknum = NF_BR_LOCAL_IN, .priority = NF_BR_PRI_FILTER_BRIDGED, }, { - .hook = ebt_hook, + .hook = ebt_in_hook, .owner = THIS_MODULE, .pf = PF_BRIDGE, .hooknum = NF_BR_FORWARD, .priority = NF_BR_PRI_FILTER_BRIDGED, }, { - .hook = ebt_hook, + .hook = ebt_out_hook, .owner = THIS_MODULE, .pf = PF_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, @@ -92,23 +98,41 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { }, }; +static int __net_init frame_filter_net_init(struct net *net) +{ + net->xt.frame_filter = ebt_register_table(net, &frame_filter); + if (IS_ERR(net->xt.frame_filter)) + return PTR_ERR(net->xt.frame_filter); + return 0; +} + +static void __net_exit frame_filter_net_exit(struct net *net) +{ + ebt_unregister_table(net->xt.frame_filter); +} + +static struct pernet_operations frame_filter_net_ops = { + .init = frame_filter_net_init, + .exit = frame_filter_net_exit, +}; + static int __init ebtable_filter_init(void) { int ret; - frame_filter = ebt_register_table(&init_net, &__frame_filter); - if (IS_ERR(frame_filter)) - return PTR_ERR(frame_filter); + ret = register_pernet_subsys(&frame_filter_net_ops); + if (ret < 0) + return ret; ret = nf_register_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); if (ret < 0) - ebt_unregister_table(frame_filter); + unregister_pernet_subsys(&frame_filter_net_ops); return ret; } static void __exit ebtable_filter_fini(void) { nf_unregister_hooks(ebt_ops_filter, ARRAY_SIZE(ebt_ops_filter)); - ebt_unregister_table(frame_filter); + unregister_pernet_subsys(&frame_filter_net_ops); } module_init(ebtable_filter_init); -- cgit v1.2.3 From b71b30a626fd0e43c825a05036e7a2c3f377a563 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:30:46 +0100 Subject: netfilter: netns ebtables: ebtable_nat in netns Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy --- include/net/netns/x_tables.h | 1 + net/bridge/netfilter/ebtable_nat.c | 47 ++++++++++++++++++++++++++------------ 2 files changed, 33 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h index d258e16c894e..9554a644a8f8 100644 --- a/include/net/netns/x_tables.h +++ b/include/net/netns/x_tables.h @@ -10,5 +10,6 @@ struct netns_xt { struct list_head tables[NFPROTO_NUMPROTO]; struct ebt_table *broute_table; struct ebt_table *frame_filter; + struct ebt_table *frame_nat; }; #endif diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 0d8fc5bcddd1..3fe1ae87e35f 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -50,48 +50,47 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table __frame_nat = +static struct ebt_table frame_nat = { .name = "nat", .table = &initial_table, .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(__frame_nat.lock), + .lock = __RW_LOCK_UNLOCKED(frame_nat.lock), .check = check, .me = THIS_MODULE, }; -static struct ebt_table *frame_nat; static unsigned int -ebt_nat_dst(unsigned int hook, struct sk_buff *skb, const struct net_device *in +ebt_nat_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, frame_nat); + return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_nat); } static unsigned int -ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in +ebt_nat_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, frame_nat); + return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_nat); } static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { { - .hook = ebt_nat_dst, + .hook = ebt_nat_out, .owner = THIS_MODULE, .pf = PF_BRIDGE, .hooknum = NF_BR_LOCAL_OUT, .priority = NF_BR_PRI_NAT_DST_OTHER, }, { - .hook = ebt_nat_src, + .hook = ebt_nat_out, .owner = THIS_MODULE, .pf = PF_BRIDGE, .hooknum = NF_BR_POST_ROUTING, .priority = NF_BR_PRI_NAT_SRC, }, { - .hook = ebt_nat_dst, + .hook = ebt_nat_in, .owner = THIS_MODULE, .pf = PF_BRIDGE, .hooknum = NF_BR_PRE_ROUTING, @@ -99,23 +98,41 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { }, }; +static int __net_init frame_nat_net_init(struct net *net) +{ + net->xt.frame_nat = ebt_register_table(net, &frame_nat); + if (IS_ERR(net->xt.frame_nat)) + return PTR_ERR(net->xt.frame_nat); + return 0; +} + +static void __net_exit frame_nat_net_exit(struct net *net) +{ + ebt_unregister_table(net->xt.frame_nat); +} + +static struct pernet_operations frame_nat_net_ops = { + .init = frame_nat_net_init, + .exit = frame_nat_net_exit, +}; + static int __init ebtable_nat_init(void) { int ret; - frame_nat = ebt_register_table(&init_net, &__frame_nat); - if (IS_ERR(frame_nat)) - return PTR_ERR(frame_nat); + ret = register_pernet_subsys(&frame_nat_net_ops); + if (ret < 0) + return ret; ret = nf_register_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); if (ret < 0) - ebt_unregister_table(frame_nat); + unregister_pernet_subsys(&frame_nat_net_ops); return ret; } static void __exit ebtable_nat_fini(void) { nf_unregister_hooks(ebt_ops_nat, ARRAY_SIZE(ebt_ops_nat)); - ebt_unregister_table(frame_nat); + unregister_pernet_subsys(&frame_nat_net_ops); } module_init(ebtable_nat_init); -- cgit v1.2.3 From 71566a0d161edec70361b7f90f6e54af6a6d5d05 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 31 Oct 2008 12:57:20 +0100 Subject: tracing/fastboot: Enable boot tracing only during initcalls Impact: modify boot tracer We used to disable the initcall tracing at a specified time (IE: end of builtin initcalls). But we don't need it anymore. It will be stopped when initcalls are finished. However we want two things: _Start this tracing only after pre-smp initcalls are finished. _Since we are planning to trace sched_switches at the same time, we want to enable them only during the initcall execution. For this purpose, this patch introduce two functions to enable/disable the sched_switch tracing during boot. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 24 ++++++++++++++++++++++-- init/main.c | 4 +++- kernel/trace/trace_boot.c | 28 ++++++++++++++++------------ 3 files changed, 41 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e46a7b34037c..4642959e5bda 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -234,6 +234,11 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } #endif +/* + * Structure which defines the trace of an initcall. + * You don't have to fill the func field since it is + * only used internally by the tracer. + */ struct boot_trace { pid_t caller; char func[KSYM_NAME_LEN]; @@ -244,13 +249,28 @@ struct boot_trace { }; #ifdef CONFIG_BOOT_TRACER +/* Append the trace on the ring-buffer */ extern void trace_boot(struct boot_trace *it, initcall_t fn); + +/* Tells the tracer that smp_pre_initcall is finished. + * So we can start the tracing + */ extern void start_boot_trace(void); -extern void stop_boot_trace(void); + +/* Resume the tracing of other necessary events + * such as sched switches + */ +extern void enable_boot_trace(void); + +/* Suspend this tracing. Actually, only sched_switches tracing have + * to be suspended. Initcalls doesn't need it.) + */ +extern void disable_boot_trace(void); #else static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } static inline void start_boot_trace(void) { } -static inline void stop_boot_trace(void) { } +static inline void enable_boot_trace(void) { } +static inline void disable_boot_trace(void) { } #endif diff --git a/init/main.c b/init/main.c index 7e117a231af1..4b03cd5656ca 100644 --- a/init/main.c +++ b/init/main.c @@ -711,6 +711,7 @@ int do_one_initcall(initcall_t fn) it.caller = task_pid_nr(current); printk("calling %pF @ %i\n", fn, it.caller); it.calltime = ktime_get(); + enable_boot_trace(); } it.result = fn(); @@ -722,6 +723,7 @@ int do_one_initcall(initcall_t fn) printk("initcall %pF returned %d after %Ld usecs\n", fn, it.result, it.duration); trace_boot(&it, fn); + disable_boot_trace(); } msgbuf[0] = 0; @@ -882,7 +884,7 @@ static int __init kernel_init(void * unused) * we're essentially up and running. Get rid of the * initmem segments and start the user-mode stuff.. */ - stop_boot_trace(); + init_post(); return 0; } diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index d0a5e50eeff2..d104d5b46413 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -13,23 +13,29 @@ #include "trace.h" static struct trace_array *boot_trace; -static int trace_boot_enabled; +static bool pre_initcalls_finished; - -/* Should be started after do_pre_smp_initcalls() in init/main.c */ +/* Tells the boot tracer that the pre_smp_initcalls are finished. + * So we are ready . + * It doesn't enable sched events tracing however. + * You have to call enable_boot_trace to do so. + */ void start_boot_trace(void) { - trace_boot_enabled = 1; + pre_initcalls_finished = true; +} + +void enable_boot_trace(void) +{ } -void stop_boot_trace(void) +void disable_boot_trace(void) { - trace_boot_enabled = 0; } void reset_boot_trace(struct trace_array *tr) { - stop_boot_trace(); + disable_boot_trace(); } static void boot_trace_init(struct trace_array *tr) @@ -37,8 +43,6 @@ static void boot_trace_init(struct trace_array *tr) int cpu; boot_trace = tr; - trace_boot_enabled = 0; - for_each_cpu_mask(cpu, cpu_possible_map) tracing_reset(tr, cpu); } @@ -46,9 +50,9 @@ static void boot_trace_init(struct trace_array *tr) static void boot_trace_ctrl_update(struct trace_array *tr) { if (tr->ctrl) - start_boot_trace(); + enable_boot_trace(); else - stop_boot_trace(); + disable_boot_trace(); } static enum print_line_t initcall_print_line(struct trace_iterator *iter) @@ -99,7 +103,7 @@ void trace_boot(struct boot_trace *it, initcall_t fn) unsigned long irq_flags; struct trace_array *tr = boot_trace; - if (!trace_boot_enabled) + if (!pre_initcalls_finished) return; /* Get its name now since this function could -- cgit v1.2.3 From fd8cd7e1919fc1c27fe2fdccd2a1cd32f791ef0f Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Mon, 3 Nov 2008 15:50:38 -0800 Subject: x86: vmware: look for DMI string in the product serial key Impact: Should permit VMware detection on older platforms where the vendor is changed. Could theoretically cause a regression if some weird serial number scheme contains the string "VMware" by pure chance. Seems unlikely, especially with the mixed case. In some user configured cases, VMware may choose not to put a VMware specific DMI string, but the product serial key is always there and is VMware specific. Add a interface to check the serial key, when checking for VMware in the DMI information. Signed-off-by: Alok N Kataria Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/vmware.c | 7 ++++++- drivers/firmware/dmi_scan.c | 11 +++++++++++ include/linux/dmi.h | 2 ++ 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index a0905ecfe7d2..c034bda842d9 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -61,6 +61,11 @@ static unsigned long __vmware_get_tsc_khz(void) return tsc_hz; } +/* + * While checking the dmi string infomation, just checking the product + * serial key should be enough, as this will always have a VMware + * specific string when running under VMware hypervisor. + */ int vmware_platform(void) { if (cpu_has_hypervisor) { @@ -74,7 +79,7 @@ int vmware_platform(void) hyper_vendor_id[12] = '\0'; if (!strcmp(hyper_vendor_id, "VMwareVMware")) return 1; - } else if (dmi_available && dmi_name_in_vendors("VMware") && + } else if (dmi_available && dmi_name_in_serial("VMware") && __vmware_platform()) return 1; diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 3e526b6d00cb..d66d41282907 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -467,6 +467,17 @@ const char *dmi_get_system_info(int field) } EXPORT_SYMBOL(dmi_get_system_info); +/** + * dmi_name_in_serial - Check if string is in the DMI product serial + * information. + */ +int dmi_name_in_serial(const char *str) +{ + int f = DMI_PRODUCT_SERIAL; + if (dmi_ident[f] && strstr(dmi_ident[f], str)) + return 1; + return 0; +} /** * dmi_name_in_vendors - Check if string is anywhere in the DMI vendor information. diff --git a/include/linux/dmi.h b/include/linux/dmi.h index e5084eb5943a..2bfda178f274 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -44,6 +44,7 @@ extern const struct dmi_device * dmi_find_device(int type, const char *name, extern void dmi_scan_machine(void); extern int dmi_get_year(int field); extern int dmi_name_in_vendors(const char *str); +extern int dmi_name_in_serial(const char *str); extern int dmi_available; extern int dmi_walk(void (*decode)(const struct dmi_header *)); @@ -56,6 +57,7 @@ static inline const struct dmi_device * dmi_find_device(int type, const char *na static inline void dmi_scan_machine(void) { return; } static inline int dmi_get_year(int year) { return 0; } static inline int dmi_name_in_vendors(const char *s) { return 0; } +static inline int dmi_name_in_serial(const char *s) { return 0; } #define dmi_available 0 static inline int dmi_walk(void (*decode)(const struct dmi_header *)) { return -1; } -- cgit v1.2.3 From d5f642384e9da75393160350f75bbb9a527f7c58 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 4 Nov 2008 14:45:58 -0800 Subject: net: #ifdef ->sk_security Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 941ad7c830a3..08291c1be41e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -271,7 +271,9 @@ struct sock { struct sk_buff *sk_send_head; __u32 sk_sndmsg_off; int sk_write_pending; +#ifdef CONFIG_SECURITY void *sk_security; +#endif __u32 sk_mark; /* XXX 4 bytes hole on 64 bit */ void (*sk_state_change)(struct sock *sk); -- cgit v1.2.3 From 7d43d1a0f2cf535167ec7247f110a1f85cecac43 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 4 Nov 2008 23:43:47 -0800 Subject: dccp: Implement lookup table for feature-negotiation information A lookup table for feature-negotiation information, extracted from RFC 4340/42, is provided by this patch. All currently known features can be found in this table, along with their feature location, their default value, and type. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 9 +++++---- net/dccp/feat.c | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6080449fbec9..3978aff197d9 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -176,19 +176,20 @@ enum { }; /* DCCP features (RFC 4340 section 6.4) */ -enum { +enum dccp_feature_numbers { DCCPF_RESERVED = 0, DCCPF_CCID = 1, - DCCPF_SHORT_SEQNOS = 2, /* XXX: not yet implemented */ + DCCPF_SHORT_SEQNOS = 2, DCCPF_SEQUENCE_WINDOW = 3, - DCCPF_ECN_INCAPABLE = 4, /* XXX: not yet implemented */ + DCCPF_ECN_INCAPABLE = 4, DCCPF_ACK_RATIO = 5, DCCPF_SEND_ACK_VECTOR = 6, DCCPF_SEND_NDP_COUNT = 7, DCCPF_MIN_CSUM_COVER = 8, - DCCPF_DATA_CHECKSUM = 9, /* XXX: not yet implemented */ + DCCPF_DATA_CHECKSUM = 9, /* 10-127 reserved */ DCCPF_MIN_CCID_SPECIFIC = 128, + DCCPF_SEND_LEV_RATE = 192, /* RFC 4342, sec. 8.4 */ DCCPF_MAX_CCID_SPECIFIC = 255, }; diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 933a0ecf8d46..45e36fc7943b 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -23,6 +23,43 @@ #define DCCP_FEAT_SP_NOAGREE (-123) +static const struct { + u8 feat_num; /* DCCPF_xxx */ + enum dccp_feat_type rxtx; /* RX or TX */ + enum dccp_feat_type reconciliation; /* SP or NN */ + u8 default_value; /* as in 6.4 */ +/* + * Lookup table for location and type of features (from RFC 4340/4342) + * +--------------------------+----+-----+----+----+---------+-----------+ + * | Feature | Location | Reconc. | Initial | Section | + * | | RX | TX | SP | NN | Value | Reference | + * +--------------------------+----+-----+----+----+---------+-----------+ + * | DCCPF_CCID | | X | X | | 2 | 10 | + * | DCCPF_SHORT_SEQNOS | | X | X | | 0 | 7.6.1 | + * | DCCPF_SEQUENCE_WINDOW | | X | | X | 100 | 7.5.2 | + * | DCCPF_ECN_INCAPABLE | X | | X | | 0 | 12.1 | + * | DCCPF_ACK_RATIO | | X | | X | 2 | 11.3 | + * | DCCPF_SEND_ACK_VECTOR | X | | X | | 0 | 11.5 | + * | DCCPF_SEND_NDP_COUNT | | X | X | | 0 | 7.7.2 | + * | DCCPF_MIN_CSUM_COVER | X | | X | | 0 | 9.2.1 | + * | DCCPF_DATA_CHECKSUM | X | | X | | 0 | 9.3.1 | + * | DCCPF_SEND_LEV_RATE | X | | X | | 0 | 4342/8.4 | + * +--------------------------+----+-----+----+----+---------+-----------+ + */ +} dccp_feat_table[] = { + { DCCPF_CCID, FEAT_AT_TX, FEAT_SP, 2 }, + { DCCPF_SHORT_SEQNOS, FEAT_AT_TX, FEAT_SP, 0 }, + { DCCPF_SEQUENCE_WINDOW, FEAT_AT_TX, FEAT_NN, 100 }, + { DCCPF_ECN_INCAPABLE, FEAT_AT_RX, FEAT_SP, 0 }, + { DCCPF_ACK_RATIO, FEAT_AT_TX, FEAT_NN, 2 }, + { DCCPF_SEND_ACK_VECTOR, FEAT_AT_RX, FEAT_SP, 0 }, + { DCCPF_SEND_NDP_COUNT, FEAT_AT_TX, FEAT_SP, 0 }, + { DCCPF_MIN_CSUM_COVER, FEAT_AT_RX, FEAT_SP, 0 }, + { DCCPF_DATA_CHECKSUM, FEAT_AT_RX, FEAT_SP, 0 }, + { DCCPF_SEND_LEV_RATE, FEAT_AT_RX, FEAT_SP, 0 }, +}; +#define DCCP_FEAT_SUPPORTED_MAX ARRAY_SIZE(dccp_feat_table) + int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, u8 *val, u8 len, gfp_t gfp) { @@ -639,6 +676,8 @@ const char *dccp_feat_name(const u8 feat) if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC) return feature_names[DCCPF_RESERVED]; + if (feat == DCCPF_SEND_LEV_RATE) + return "Send Loss Event Rate"; if (feat >= DCCPF_MIN_CCID_SPECIFIC) return "CCID-specific"; -- cgit v1.2.3 From ac75773c2742d82cbcb078708df406e9017224b7 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 4 Nov 2008 23:55:49 -0800 Subject: dccp: Per-socket initialisation of feature negotiation This provides feature-negotiation initialisation for both DCCP sockets and DCCP request_sockets, to support feature negotiation during connection setup. It also resolves a FIXME regarding the congestion control initialisation. Thanks to Wei Yongjun for help with the IPv6 side of this patch. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 4 ++++ net/dccp/dccp.h | 3 ++- net/dccp/feat.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ net/dccp/feat.h | 1 + net/dccp/input.c | 2 -- net/dccp/ipv4.c | 3 ++- net/dccp/ipv6.c | 3 ++- net/dccp/minisocks.c | 7 ++++++- net/dccp/proto.c | 1 + 9 files changed, 73 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 3978aff197d9..484b8a1fb023 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -412,6 +412,7 @@ extern void dccp_minisock_init(struct dccp_minisock *dmsk); * @dreq_iss: initial sequence number sent on the Response (RFC 4340, 7.1) * @dreq_isr: initial sequence number received on the Request * @dreq_service: service code present on the Request (there is just one) + * @dreq_featneg: feature negotiation options for this connection * The following two fields are analogous to the ones in dccp_sock: * @dreq_timestamp_echo: last received timestamp to echo (13.1) * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo @@ -421,6 +422,7 @@ struct dccp_request_sock { __u64 dreq_iss; __u64 dreq_isr; __be32 dreq_service; + struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; }; @@ -498,6 +500,7 @@ struct dccp_ackvec; * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) * @dccps_minisock - associated minisock (accessed via dccp_msk) + * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) @@ -535,6 +538,7 @@ struct dccp_sock { __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; + struct list_head dccps_featneg; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index d6fed595a3ff..dee4a90886d6 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -252,7 +252,8 @@ extern const char *dccp_state_name(const int state); extern void dccp_set_state(struct sock *sk, const int state); extern void dccp_done(struct sock *sk); -extern void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb); +extern int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp, + struct sk_buff const *skb); extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 9a37b6ce3aca..069d8ffe4c6f 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -90,6 +90,20 @@ static u8 dccp_feat_type(u8 feat_num) return dccp_feat_table[idx].reconciliation; } +/* copy constructor, fval must not already contain allocated memory */ +static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len) +{ + fval->sp.len = len; + if (fval->sp.len > 0) { + fval->sp.vec = kmemdup(val, len, gfp_any()); + if (fval->sp.vec == NULL) { + fval->sp.len = 0; + return -ENOBUFS; + } + } + return 0; +} + static void dccp_feat_val_destructor(u8 feat_num, dccp_feat_val *val) { if (unlikely(val == NULL)) @@ -99,6 +113,28 @@ static void dccp_feat_val_destructor(u8 feat_num, dccp_feat_val *val) memset(val, 0, sizeof(*val)); } +static struct dccp_feat_entry * + dccp_feat_clone_entry(struct dccp_feat_entry const *original) +{ + struct dccp_feat_entry *new; + u8 type = dccp_feat_type(original->feat_num); + + if (type == FEAT_UNKNOWN) + return NULL; + + new = kmemdup(original, sizeof(struct dccp_feat_entry), gfp_any()); + if (new == NULL) + return NULL; + + if (type == FEAT_SP && dccp_feat_clone_sp_val(&new->val, + original->val.sp.vec, + original->val.sp.len)) { + kfree(new); + return NULL; + } + return new; +} + static void dccp_feat_entry_destructor(struct dccp_feat_entry *entry) { if (entry != NULL) { @@ -133,6 +169,25 @@ void dccp_feat_list_purge(struct list_head *fn_list) } EXPORT_SYMBOL_GPL(dccp_feat_list_purge); +/* generate @to as full clone of @from - @to must not contain any nodes */ +int dccp_feat_clone_list(struct list_head const *from, struct list_head *to) +{ + struct dccp_feat_entry *entry, *new; + + INIT_LIST_HEAD(to); + list_for_each_entry(entry, from, node) { + new = dccp_feat_clone_entry(entry); + if (new == NULL) + goto cloning_failed; + list_add_tail(&new->node, to); + } + return 0; + +cloning_failed: + dccp_feat_list_purge(to); + return -ENOMEM; +} + int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, u8 *val, u8 len, gfp_t gfp) { diff --git a/net/dccp/feat.h b/net/dccp/feat.h index 56df82ceef09..f5e99b39712a 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -96,6 +96,7 @@ extern int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len); extern void dccp_feat_clean(struct dccp_minisock *dmsk); extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk); +extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); extern int dccp_feat_init(struct dccp_minisock *dmsk); #endif /* _DCCP_FEAT_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c index 779d0ed9ae94..3070015edc75 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -590,8 +590,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) < 0) return 1; - - /* FIXME: do congestion control initialization */ goto discard; } if (dh->dccph_type == DCCP_PKT_RESET) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 01e3e0206254..cbf522dfecc4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -595,7 +595,8 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; - dccp_reqsk_init(req, skb); + if (dccp_reqsk_init(req, dccp_sk(sk), skb)) + goto drop_and_free; dreq = dccp_rsk(req); if (dccp_parse_options(sk, dreq, skb)) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d4ce1224e008..4e172ccfd76c 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -426,7 +426,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (req == NULL) goto drop; - dccp_reqsk_init(req, skb); + if (dccp_reqsk_init(req, dccp_sk(sk), skb)) + goto drop_and_free; dreq = dccp_rsk(req); if (dccp_parse_options(sk, dreq, skb)) diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index e6bf99e3e41a..afdacbb94d75 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -125,6 +125,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newdp->dccps_timestamp_time = dreq->dreq_timestamp_time; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; + INIT_LIST_HEAD(&newdp->dccps_featneg); if (dccp_feat_clone(sk, newsk)) goto out_free; @@ -304,7 +305,8 @@ void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack); -void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) +int dccp_reqsk_init(struct request_sock *req, + struct dccp_sock const *dp, struct sk_buff const *skb) { struct dccp_request_sock *dreq = dccp_rsk(req); @@ -313,6 +315,9 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) inet_rsk(req)->acked = 0; req->rcv_wnd = sysctl_dccp_feat_sequence_window; dreq->dreq_timestamp_echo = 0; + + /* inherit feature negotiation options from listening socket */ + return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg); } EXPORT_SYMBOL_GPL(dccp_reqsk_init); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index d0bd34819761..1cdf4ae99605 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -193,6 +193,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) dccp_init_xmit_timers(sk); + INIT_LIST_HEAD(&dp->dccps_featneg); /* * FIXME: We're hardcoding the CCID, and doing this at this point makes * the listening (master) sock get CCID control blocks, which is not -- cgit v1.2.3 From ea913940c39a61214c799cc7093d7b20fe11a94c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 5 Nov 2008 11:13:21 +0000 Subject: ASoC: Remove core version number Rather than try to remember to keep the core version number updated (which hasn't been happening) just remove it. It was much more useful when ASoC was out of tree. Signed-off-by: Mark brown --- include/sound/soc.h | 2 -- sound/soc/soc-core.c | 1 - 2 files changed, 3 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index da0040b69c2d..fa1b99b45893 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -21,8 +21,6 @@ #include #include -#define SND_SOC_VERSION "0.13.2" - /* * Convenience kcontrol builders */ diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 1a173682965a..9feaa7b6dc34 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1965,7 +1965,6 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_digital_mute); static int __devinit snd_soc_init(void) { - printk(KERN_INFO "ASoC version %s\n", SND_SOC_VERSION); return platform_driver_register(&soc_driver); } -- cgit v1.2.3 From 1f29fae29709b4668979e244c09b2fa78ff1ad59 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Wed, 5 Nov 2008 16:08:52 -0600 Subject: file capabilities: add no_file_caps switch (v4) Add a no_file_caps boot option when file capabilities are compiled into the kernel (CONFIG_SECURITY_FILE_CAPABILITIES=y). This allows distributions to ship a kernel with file capabilities compiled in, without forcing users to use (and understand and trust) them. When no_file_caps is specified at boot, then when a process executes a file, any file capabilities stored with that file will not be used in the calculation of the process' new capability sets. This means that booting with the no_file_caps boot option will not be the same as booting a kernel with file capabilities compiled out - in particular a task with CAP_SETPCAP will not have any chance of passing capabilities to another task (which isn't "really" possible anyway, and which may soon by killed altogether by David Howells in any case), and it will instead be able to put new capabilities in its pI. However since fI will always be empty and pI is masked with fI, it gains the task nothing. We also support the extra prctl options, setting securebits and dropping capabilities from the per-process bounding set. The other remaining difference is that killpriv, task_setscheduler, setioprio, and setnice will continue to be hooked. That will be noticable in the case where a root task changed its uid while keeping some caps, and another task owned by the new uid tries to change settings for the more privileged task. Changelog: Nov 05 2008: (v4) trivial port on top of always-start-\ with-clear-caps patch Sep 23 2008: nixed file_caps_enabled when file caps are not compiled in as it isn't used. Document no_file_caps in kernel-parameters.txt. Signed-off-by: Serge Hallyn Acked-by: Andrew G. Morgan Signed-off-by: James Morris --- Documentation/kernel-parameters.txt | 4 ++++ include/linux/capability.h | 3 +++ kernel/capability.c | 11 +++++++++++ security/commoncap.c | 3 +++ 4 files changed, 21 insertions(+) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1bbcaa8982b6..784443acca9c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1459,6 +1459,10 @@ and is between 256 and 4096 characters. It is defined in the file instruction doesn't work correctly and not to use it. + no_file_caps Tells the kernel not to honor file capabilities. The + only way then for a file to be executed with privilege + is to be setuid root or executed by root. + nohalt [IA-64] Tells the kernel not to use the power saving function PAL_HALT_LIGHT when idle. This increases power-consumption. On the positive side, it reduces diff --git a/include/linux/capability.h b/include/linux/capability.h index 9d1fe30b6f6c..5bc145bd759a 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -68,6 +68,9 @@ typedef struct __user_cap_data_struct { #define VFS_CAP_U32 VFS_CAP_U32_2 #define VFS_CAP_REVISION VFS_CAP_REVISION_2 +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES +extern int file_caps_enabled; +#endif struct vfs_cap_data { __le32 magic_etc; /* Little endian */ diff --git a/kernel/capability.c b/kernel/capability.c index 33e51e78c2d8..e13a68535ad5 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -33,6 +33,17 @@ EXPORT_SYMBOL(__cap_empty_set); EXPORT_SYMBOL(__cap_full_set); EXPORT_SYMBOL(__cap_init_eff_set); +#ifdef CONFIG_SECURITY_FILE_CAPABILITIES +int file_caps_enabled = 1; + +static int __init file_caps_disable(char *str) +{ + file_caps_enabled = 0; + return 1; +} +__setup("no_file_caps", file_caps_disable); +#endif + /* * More recent versions of libcap are available from: * diff --git a/security/commoncap.c b/security/commoncap.c index 3976613db829..f88119cb2bc2 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -281,6 +281,9 @@ static int get_file_caps(struct linux_binprm *bprm) bprm_clear_caps(bprm); + if (!file_caps_enabled) + return 0; + if (bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID) return 0; -- cgit v1.2.3 From ae33bc40c0d96d02f51a996482ea7e41c5152695 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 5 Nov 2008 16:00:02 -0800 Subject: net: Guaranetee the proper ordering of the loopback device. I was recently hunting a bug that occurred in network namespace cleanup. In looking at the code it became apparrent that we have and will continue to have cases where if we have anything going on in a network namespace there will be assumptions that the loopback device is present. Things like sending igmp unsubscribe messages when we bring down network devices invokes the routing code which assumes that at least the loopback driver is present. Therefore to avoid magic initcall ordering hackery that is hard to follow and hard to get right insert a call to register the loopback device directly from net_dev_init(). This guarantes that the loopback device is the first device registered and the last network device to go away. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/loopback.c | 13 ++----------- include/linux/netdevice.h | 1 + net/core/dev.c | 12 ++++++++++++ 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 91d08585a6d8..c4516b580ba5 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -204,17 +204,8 @@ static __net_exit void loopback_net_exit(struct net *net) unregister_netdev(dev); } -static struct pernet_operations __net_initdata loopback_net_ops = { +/* Registered in net/core/dev.c */ +struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, .exit = loopback_net_exit, }; - -static int __init loopback_init(void) -{ - return register_pernet_device(&loopback_net_ops); -} - -/* Loopback is special. It should be initialized before any other network - * device and network subsystem. - */ -fs_initcall(loopback_init); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f1b0dbe58464..12d7f4469dc9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,6 +1766,7 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } +extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 9475f3e624a8..811507c39805 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4904,6 +4904,18 @@ static int __init net_dev_init(void) if (register_pernet_subsys(&netdev_net_ops)) goto out; + /* The loopback device is special if any other network devices + * is present in a network namespace the loopback device must + * be present. Since we now dynamically allocate and free the + * loopback device ensure this invariant is maintained by + * keeping the loopback device as the first device on the + * list of network devices. Ensuring the loopback devices + * is the first device that appears and the last network device + * that disappears. + */ + if (register_pernet_device(&loopback_net_ops)) + goto out; + if (register_pernet_device(&default_device_ops)) goto out; -- cgit v1.2.3 From 61c9eaf90081cbe6dc4f389e0056bff76eca19ec Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 5 Nov 2008 16:02:34 -0800 Subject: pkt_sched: Fix qdisc len in qdisc_peek_dequeued() A packet dequeued and stored as gso_skb in qdisc_peek_dequeued() should be seen as part of the queue for sch->q.qlen queries until it's really dequeued with qdisc_dequeue_peeked(), so qlen needs additional updating in these functions. (Updating qstats.backlog shouldn't matter here.) Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9dcb5bfe094a..64ae1ba9f554 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -442,8 +442,12 @@ static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) { /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ - if (!sch->gso_skb) + if (!sch->gso_skb) { sch->gso_skb = sch->dequeue(sch); + if (sch->gso_skb) + /* it's still part of the queue */ + sch->q.qlen++; + } return sch->gso_skb; } @@ -453,10 +457,12 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) { struct sk_buff *skb = sch->gso_skb; - if (skb) + if (skb) { sch->gso_skb = NULL; - else + sch->q.qlen--; + } else { skb = sch->dequeue(sch); + } return skb; } -- cgit v1.2.3 From 305d552accae6afb859c493ebc7d98ca3371dae2 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Tue, 4 Nov 2008 17:51:14 -0800 Subject: bonding: send IPv6 neighbor advertisement on failover This patch adds better IPv6 failover support for bonding devices, especially when in active-backup mode and there are only IPv6 addresses configured, as reported by Alex Sidorenko. - Creates a new file, net/drivers/bonding/bond_ipv6.c, for the IPv6-specific routines. Both regular bonds and VLANs over bonds are supported. - Adds a new tunable, num_unsol_na, to limit the number of unsolicited IPv6 Neighbor Advertisements that are sent on a failover event. Default is 1. - Creates two new IPv6 neighbor discovery functions: ndisc_build_skb() ndisc_send_skb() These were required to support VLANs since we have to be able to add the VLAN id to the skb since ndisc_send_na() and friends shouldn't be asked to do this. These two routines are basically __ndisc_send() split into two pieces, in a slightly different order. - Updates Documentation/networking/bonding.txt and bumps the rev of bond support to 3.4.0. On failover, this new code will generate one packet: - An unsolicited IPv6 Neighbor Advertisement, which helps the switch learn that the address has moved to the new slave. Testing has shown that sending just the NA results in pretty good behavior when in active-back mode, I saw no lost ping packets for example. Signed-off-by: Brian Haley Signed-off-by: Jay Vosburgh Signed-off-by: Jeff Garzik --- Documentation/networking/bonding.txt | 10 ++ drivers/net/Kconfig | 1 + drivers/net/bonding/Makefile | 3 + drivers/net/bonding/bond_ipv6.c | 218 +++++++++++++++++++++++++++++++++++ drivers/net/bonding/bond_main.c | 33 +++++- drivers/net/bonding/bond_sysfs.c | 42 +++++++ drivers/net/bonding/bonding.h | 34 +++++- include/net/ndisc.h | 14 +++ net/ipv6/ndisc.c | 92 ++++++++++----- 9 files changed, 416 insertions(+), 31 deletions(-) create mode 100644 drivers/net/bonding/bond_ipv6.c (limited to 'include') diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index d733a428eff6..3f4d0fae7081 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -551,6 +551,16 @@ num_grat_arp affects only the active-backup mode. This option was added for bonding version 3.3.0. +num_unsol_na + + Specifies the number of unsolicited IPv6 Neighbor Advertisements + to be issued after a failover event. One unsolicited NA is issued + immediately after the failover. + + The valid range is 0 - 255; the default value is 1. This option + affects only the active-backup mode. This option was added for + bonding version 3.4.0. + primary A string (eth0, eth2, etc) specifying which slave is the diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 0f3e6b2d2808..f1d0a1371695 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -61,6 +61,7 @@ config DUMMY config BONDING tristate "Bonding driver support" depends on INET + depends on IPV6 || IPV6=n ---help--- Say 'Y' or 'M' if you wish to be able to 'bond' multiple Ethernet Channels together. This is called 'Etherchannel' by Cisco, diff --git a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile index 5cdae2bc055a..6f9c6faef24c 100644 --- a/drivers/net/bonding/Makefile +++ b/drivers/net/bonding/Makefile @@ -6,3 +6,6 @@ obj-$(CONFIG_BONDING) += bonding.o bonding-objs := bond_main.o bond_3ad.o bond_alb.o bond_sysfs.o +ipv6-$(subst m,y,$(CONFIG_IPV6)) += bond_ipv6.o +bonding-objs += $(ipv6-y) + diff --git a/drivers/net/bonding/bond_ipv6.c b/drivers/net/bonding/bond_ipv6.c new file mode 100644 index 000000000000..7c78b7bf671c --- /dev/null +++ b/drivers/net/bonding/bond_ipv6.c @@ -0,0 +1,218 @@ +/* + * Copyright(c) 2008 Hewlett-Packard Development Company, L.P. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called LICENSE. + * + */ + +//#define BONDING_DEBUG 1 + +#include +#include +#include +#include +#include +#include "bonding.h" + +/* + * Assign bond->master_ipv6 to the next IPv6 address in the list, or + * zero it out if there are none. + */ +static void bond_glean_dev_ipv6(struct net_device *dev, struct in6_addr *addr) +{ + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + + if (!dev) + return; + + idev = in6_dev_get(dev); + if (!idev) + return; + + read_lock_bh(&idev->lock); + ifa = idev->addr_list; + if (ifa) + ipv6_addr_copy(addr, &ifa->addr); + else + ipv6_addr_set(addr, 0, 0, 0, 0); + + read_unlock_bh(&idev->lock); + + in6_dev_put(idev); +} + +static void bond_na_send(struct net_device *slave_dev, + struct in6_addr *daddr, + int router, + unsigned short vlan_id) +{ + struct in6_addr mcaddr; + struct icmp6hdr icmp6h = { + .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT, + }; + struct sk_buff *skb; + + icmp6h.icmp6_router = router; + icmp6h.icmp6_solicited = 0; + icmp6h.icmp6_override = 1; + + addrconf_addr_solict_mult(daddr, &mcaddr); + + dprintk("ipv6 na on slave %s: dest %pI6, src %pI6\n", + slave->name, &mcaddr, daddr); + + skb = ndisc_build_skb(slave_dev, &mcaddr, daddr, &icmp6h, daddr, + ND_OPT_TARGET_LL_ADDR); + + if (!skb) { + printk(KERN_ERR DRV_NAME ": NA packet allocation failed\n"); + return; + } + + if (vlan_id) { + skb = vlan_put_tag(skb, vlan_id); + if (!skb) { + printk(KERN_ERR DRV_NAME ": failed to insert VLAN tag\n"); + return; + } + } + + ndisc_send_skb(skb, slave_dev, NULL, &mcaddr, daddr, &icmp6h); +} + +/* + * Kick out an unsolicited Neighbor Advertisement for an IPv6 address on + * the bonding master. This will help the switch learn our address + * if in active-backup mode. + * + * Caller must hold curr_slave_lock for read or better + */ +void bond_send_unsolicited_na(struct bonding *bond) +{ + struct slave *slave = bond->curr_active_slave; + struct vlan_entry *vlan; + struct inet6_dev *idev; + int is_router; + + dprintk("bond_send_unsol_na: bond %s slave %s\n", bond->dev->name, + slave ? slave->dev->name : "NULL"); + + if (!slave || !bond->send_unsol_na || + test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) + return; + + bond->send_unsol_na--; + + idev = in6_dev_get(bond->dev); + if (!idev) + return; + + is_router = !!idev->cnf.forwarding; + + in6_dev_put(idev); + + if (!ipv6_addr_any(&bond->master_ipv6)) + bond_na_send(slave->dev, &bond->master_ipv6, is_router, 0); + + list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { + if (!ipv6_addr_any(&vlan->vlan_ipv6)) { + bond_na_send(slave->dev, &vlan->vlan_ipv6, is_router, + vlan->vlan_id); + } + } +} + +/* + * bond_inet6addr_event: handle inet6addr notifier chain events. + * + * We keep track of device IPv6 addresses primarily to use as source + * addresses in NS probes. + * + * We track one IPv6 for the main device (if it has one). + */ +static int bond_inet6addr_event(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + struct inet6_ifaddr *ifa = ptr; + struct net_device *vlan_dev, *event_dev = ifa->idev->dev; + struct bonding *bond; + struct vlan_entry *vlan; + + if (dev_net(event_dev) != &init_net) + return NOTIFY_DONE; + + list_for_each_entry(bond, &bond_dev_list, bond_list) { + if (bond->dev == event_dev) { + switch (event) { + case NETDEV_UP: + if (ipv6_addr_any(&bond->master_ipv6)) + ipv6_addr_copy(&bond->master_ipv6, + &ifa->addr); + return NOTIFY_OK; + case NETDEV_DOWN: + if (ipv6_addr_equal(&bond->master_ipv6, + &ifa->addr)) + bond_glean_dev_ipv6(bond->dev, + &bond->master_ipv6); + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } + } + + list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { + vlan_dev = vlan_group_get_device(bond->vlgrp, + vlan->vlan_id); + if (vlan_dev == event_dev) { + switch (event) { + case NETDEV_UP: + if (ipv6_addr_any(&vlan->vlan_ipv6)) + ipv6_addr_copy(&vlan->vlan_ipv6, + &ifa->addr); + return NOTIFY_OK; + case NETDEV_DOWN: + if (ipv6_addr_equal(&vlan->vlan_ipv6, + &ifa->addr)) + bond_glean_dev_ipv6(vlan_dev, + &vlan->vlan_ipv6); + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } + } + } + } + return NOTIFY_DONE; +} + +static struct notifier_block bond_inet6addr_notifier = { + .notifier_call = bond_inet6addr_event, +}; + +void bond_register_ipv6_notifier(void) +{ + register_inet6addr_notifier(&bond_inet6addr_notifier); +} + +void bond_unregister_ipv6_notifier(void) +{ + unregister_inet6addr_notifier(&bond_inet6addr_notifier); +} + diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 39575d764974..798d98ce2d97 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -89,6 +89,7 @@ static int max_bonds = BOND_DEFAULT_MAX_BONDS; static int num_grat_arp = 1; +static int num_unsol_na = 1; static int miimon = BOND_LINK_MON_INTERV; static int updelay = 0; static int downdelay = 0; @@ -107,6 +108,8 @@ module_param(max_bonds, int, 0); MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); module_param(num_grat_arp, int, 0644); MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event"); +module_param(num_unsol_na, int, 0644); +MODULE_PARM_DESC(num_unsol_na, "Number of unsolicited IPv6 Neighbor Advertisements packets to send on failover event"); module_param(miimon, int, 0); MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); module_param(updelay, int, 0); @@ -242,14 +245,13 @@ static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id) dprintk("bond: %s, vlan id %d\n", (bond ? bond->dev->name: "None"), vlan_id); - vlan = kmalloc(sizeof(struct vlan_entry), GFP_KERNEL); + vlan = kzalloc(sizeof(struct vlan_entry), GFP_KERNEL); if (!vlan) { return -ENOMEM; } INIT_LIST_HEAD(&vlan->vlan_list); vlan->vlan_id = vlan_id; - vlan->vlan_ip = 0; write_lock_bh(&bond->lock); @@ -1208,6 +1210,9 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) bond->send_grat_arp = bond->params.num_grat_arp; bond_send_gratuitous_arp(bond); + bond->send_unsol_na = bond->params.num_unsol_na; + bond_send_unsolicited_na(bond); + write_unlock_bh(&bond->curr_slave_lock); read_unlock(&bond->lock); @@ -2463,6 +2468,12 @@ void bond_mii_monitor(struct work_struct *work) read_unlock(&bond->curr_slave_lock); } + if (bond->send_unsol_na) { + read_lock(&bond->curr_slave_lock); + bond_send_unsolicited_na(bond); + read_unlock(&bond->curr_slave_lock); + } + if (bond_miimon_inspect(bond)) { read_unlock(&bond->lock); rtnl_lock(); @@ -3158,6 +3169,12 @@ void bond_activebackup_arp_mon(struct work_struct *work) read_unlock(&bond->curr_slave_lock); } + if (bond->send_unsol_na) { + read_lock(&bond->curr_slave_lock); + bond_send_unsolicited_na(bond); + read_unlock(&bond->curr_slave_lock); + } + if (bond_ab_arp_inspect(bond, delta_in_ticks)) { read_unlock(&bond->lock); rtnl_lock(); @@ -3827,6 +3844,7 @@ static int bond_close(struct net_device *bond_dev) write_lock_bh(&bond->lock); bond->send_grat_arp = 0; + bond->send_unsol_na = 0; /* signal timers not to re-arm */ bond->kill_timers = 1; @@ -4542,6 +4560,7 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params) bond->primary_slave = NULL; bond->dev = bond_dev; bond->send_grat_arp = 0; + bond->send_unsol_na = 0; bond->setup_by_slave = 0; INIT_LIST_HEAD(&bond->vlan_list); @@ -4791,6 +4810,13 @@ static int bond_check_params(struct bond_params *params) num_grat_arp = 1; } + if (num_unsol_na < 0 || num_unsol_na > 255) { + printk(KERN_WARNING DRV_NAME + ": Warning: num_unsol_na (%d) not in range 0-255 so it " + "was reset to 1 \n", num_unsol_na); + num_unsol_na = 1; + } + /* reset values for 802.3ad */ if (bond_mode == BOND_MODE_8023AD) { if (!miimon) { @@ -4992,6 +5018,7 @@ static int bond_check_params(struct bond_params *params) params->xmit_policy = xmit_hashtype; params->miimon = miimon; params->num_grat_arp = num_grat_arp; + params->num_unsol_na = num_unsol_na; params->arp_interval = arp_interval; params->arp_validate = arp_validate_value; params->updelay = updelay; @@ -5144,6 +5171,7 @@ static int __init bonding_init(void) register_netdevice_notifier(&bond_netdev_notifier); register_inetaddr_notifier(&bond_inetaddr_notifier); + bond_register_ipv6_notifier(); goto out; err: @@ -5166,6 +5194,7 @@ static void __exit bonding_exit(void) { unregister_netdevice_notifier(&bond_netdev_notifier); unregister_inetaddr_notifier(&bond_inetaddr_notifier); + bond_unregister_ipv6_notifier(); bond_destroy_sysfs(); diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index e400d7dfdfc8..8788e3e33852 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -983,6 +983,47 @@ out: return ret; } static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR, bonding_show_n_grat_arp, bonding_store_n_grat_arp); + +/* + * Show and set the number of unsolicted NA's to send after a failover event. + */ +static ssize_t bonding_show_n_unsol_na(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + + return sprintf(buf, "%d\n", bond->params.num_unsol_na); +} + +static ssize_t bonding_store_n_unsol_na(struct device *d, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int new_value, ret = count; + struct bonding *bond = to_bond(d); + + if (sscanf(buf, "%d", &new_value) != 1) { + printk(KERN_ERR DRV_NAME + ": %s: no num_unsol_na value specified.\n", + bond->dev->name); + ret = -EINVAL; + goto out; + } + if (new_value < 0 || new_value > 255) { + printk(KERN_ERR DRV_NAME + ": %s: Invalid num_unsol_na value %d not in range 0-255; rejected.\n", + bond->dev->name, new_value); + ret = -EINVAL; + goto out; + } else { + bond->params.num_unsol_na = new_value; + } +out: + return ret; +} +static DEVICE_ATTR(num_unsol_na, S_IRUGO | S_IWUSR, bonding_show_n_unsol_na, bonding_store_n_unsol_na); + /* * Show and set the MII monitor interval. There are two tricky bits * here. First, if MII monitoring is activated, then we must disable @@ -1420,6 +1461,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_lacp_rate.attr, &dev_attr_xmit_hash_policy.attr, &dev_attr_num_grat_arp.attr, + &dev_attr_num_unsol_na.attr, &dev_attr_miimon.attr, &dev_attr_primary.attr, &dev_attr_use_carrier.attr, diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index ffb668dd6d3b..0491c7c2645b 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -19,16 +19,19 @@ #include #include #include +#include #include "bond_3ad.h" #include "bond_alb.h" -#define DRV_VERSION "3.3.0" -#define DRV_RELDATE "June 10, 2008" +#define DRV_VERSION "3.4.0" +#define DRV_RELDATE "October 7, 2008" #define DRV_NAME "bonding" #define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" #define BOND_MAX_ARP_TARGETS 16 +extern struct list_head bond_dev_list; + #ifdef BONDING_DEBUG #define dprintk(fmt, args...) \ printk(KERN_DEBUG \ @@ -126,6 +129,7 @@ struct bond_params { int xmit_policy; int miimon; int num_grat_arp; + int num_unsol_na; int arp_interval; int arp_validate; int use_carrier; @@ -148,6 +152,9 @@ struct vlan_entry { struct list_head vlan_list; __be32 vlan_ip; unsigned short vlan_id; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct in6_addr vlan_ipv6; +#endif }; struct slave { @@ -195,6 +202,7 @@ struct bonding { rwlock_t curr_slave_lock; s8 kill_timers; s8 send_grat_arp; + s8 send_unsol_na; s8 setup_by_slave; struct net_device_stats stats; #ifdef CONFIG_PROC_FS @@ -218,6 +226,9 @@ struct bonding { struct delayed_work arp_work; struct delayed_work alb_work; struct delayed_work ad_work; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct in6_addr master_ipv6; +#endif }; /** @@ -341,5 +352,24 @@ extern struct bond_parm_tbl xmit_hashtype_tbl[]; extern struct bond_parm_tbl arp_validate_tbl[]; extern struct bond_parm_tbl fail_over_mac_tbl[]; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +void bond_send_unsolicited_na(struct bonding *bond); +void bond_register_ipv6_notifier(void); +void bond_unregister_ipv6_notifier(void); +#else +static inline void bond_send_unsolicited_na(struct bonding *bond) +{ + return; +} +static inline void bond_register_ipv6_notifier(void) +{ + return; +} +static inline void bond_unregister_ipv6_notifier(void) +{ + return; +} +#endif + #endif /* _LINUX_BONDING_H */ diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 11dd0137c6a5..ce532f2222ce 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -108,6 +108,20 @@ extern void ndisc_send_redirect(struct sk_buff *skb, extern int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir); +extern struct sk_buff *ndisc_build_skb(struct net_device *dev, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h, + const struct in6_addr *target, + int llinfo); + +extern void ndisc_send_skb(struct sk_buff *skb, + struct net_device *dev, + struct neighbour *neigh, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h); + /* diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 2a6752dae09d..fbf451c0d77a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -437,38 +437,20 @@ static void pndisc_destructor(struct pneigh_entry *n) ipv6_dev_mc_dec(dev, &maddr); } -/* - * Send a Neighbour Advertisement - */ -static void __ndisc_send(struct net_device *dev, - struct neighbour *neigh, - const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct icmp6hdr *icmp6h, const struct in6_addr *target, - int llinfo) +struct sk_buff *ndisc_build_skb(struct net_device *dev, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h, + const struct in6_addr *target, + int llinfo) { - struct flowi fl; - struct dst_entry *dst; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; struct sk_buff *skb; struct icmp6hdr *hdr; - struct inet6_dev *idev; int len; int err; - u8 *opt, type; - - type = icmp6h->icmp6_type; - - icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); - - dst = icmp6_dst_alloc(dev, neigh, daddr); - if (!dst) - return; - - err = xfrm_lookup(&dst, &fl, NULL, 0); - if (err < 0) - return; + u8 *opt; if (!dev->addr_len) llinfo = 0; @@ -485,8 +467,7 @@ static void __ndisc_send(struct net_device *dev, ND_PRINTK0(KERN_ERR "ICMPv6 ND: %s() failed to allocate an skb.\n", __func__); - dst_release(dst); - return; + return NULL; } skb_reserve(skb, LL_RESERVED_SPACE(dev)); @@ -513,6 +494,42 @@ static void __ndisc_send(struct net_device *dev, csum_partial((__u8 *) hdr, len, 0)); + return skb; +} + +EXPORT_SYMBOL(ndisc_build_skb); + +void ndisc_send_skb(struct sk_buff *skb, + struct net_device *dev, + struct neighbour *neigh, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h) +{ + struct flowi fl; + struct dst_entry *dst; + struct net *net = dev_net(dev); + struct sock *sk = net->ipv6.ndisc_sk; + struct inet6_dev *idev; + int err; + u8 type; + + type = icmp6h->icmp6_type; + + icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); + + dst = icmp6_dst_alloc(dev, neigh, daddr); + if (!dst) { + kfree_skb(skb); + return; + } + + err = xfrm_lookup(&dst, &fl, NULL, 0); + if (err < 0) { + kfree_skb(skb); + return; + } + skb->dst = dst; idev = in6_dev_get(dst->dev); @@ -529,6 +546,27 @@ static void __ndisc_send(struct net_device *dev, in6_dev_put(idev); } +EXPORT_SYMBOL(ndisc_send_skb); + +/* + * Send a Neighbour Discover packet + */ +static void __ndisc_send(struct net_device *dev, + struct neighbour *neigh, + const struct in6_addr *daddr, + const struct in6_addr *saddr, + struct icmp6hdr *icmp6h, const struct in6_addr *target, + int llinfo) +{ + struct sk_buff *skb; + + skb = ndisc_build_skb(dev, daddr, saddr, icmp6h, target, llinfo); + if (!skb) + return; + + ndisc_send_skb(skb, dev, neigh, daddr, saddr, icmp6h); +} + static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, -- cgit v1.2.3 From fd9abb3d97c2ab883e4732ec1214fe64190236e7 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Wed, 5 Nov 2008 00:35:37 +0000 Subject: SMSC LAN911x and LAN921x vendor driver Attached is a driver for SMSC's LAN911x and LAN921x families of embedded ethernet controllers. There is an existing smc911x driver in the tree; this is intended to replace it. Dustin McIntire (the author of the smc911x driver) has expressed his support for switching to this driver. This driver contains workarounds for all known hardware issues, and has been tested on all flavours of the chip on multiple architectures. This driver now uses phylib, so this patch also adds support for the device's internal phy Signed-off-by: Steve Glendinning Signed-off-by: Bahadir Balban Signed-off-by: Dustin Mcintire Signed-off-by: Bill Gatliff Signed-off-by: Jeff Garzik --- MAINTAINERS | 6 + drivers/net/Kconfig | 14 + drivers/net/Makefile | 1 + drivers/net/phy/smsc.c | 28 + drivers/net/smsc911x.c | 2091 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/net/smsc911x.h | 394 +++++++++ include/linux/smsc911x.h | 42 + 7 files changed, 2576 insertions(+) create mode 100644 drivers/net/smsc911x.c create mode 100644 drivers/net/smsc911x.h create mode 100644 include/linux/smsc911x.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 129117fe6490..5d951f3db018 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3853,6 +3853,12 @@ M: mhoffman@lightlink.com L: lm-sensors@lm-sensors.org S: Maintained +SMSC911x ETHERNET DRIVER +P: Steve Glendinning +M: steve.glendinning@smsc.com +L: netdev@vger.kernel.org +S: Supported + SMX UIO Interface P: Ben Nizette M: bn@niasdigital.com diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index f1d0a1371695..74a18a7f8f40 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -979,6 +979,20 @@ config SMC911X called smc911x. If you want to compile it as a module, say M here and read +config SMSC911X + tristate "SMSC LAN911x/LAN921x families embedded ethernet support" + depends on ARM || SUPERH + select CRC32 + select MII + select PHYLIB + ---help--- + Say Y here if you want support for SMSC LAN911x and LAN921x families + of ethernet controllers. + + To compile this driver as a module, choose M here and read + . The module + will be called smsc911x. + config NET_VENDOR_RACAL bool "Racal-Interlan (Micom) NI cards" depends on ISA diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 657c47b1a6b6..e06829aa75b0 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -219,6 +219,7 @@ obj-$(CONFIG_S2IO) += s2io.o obj-$(CONFIG_MYRI10GE) += myri10ge/ obj-$(CONFIG_SMC91X) += smc91x.o obj-$(CONFIG_SMC911X) += smc911x.o +obj-$(CONFIG_SMSC911X) += smsc911x.o obj-$(CONFIG_BFIN_MAC) += bfin_mac.o obj-$(CONFIG_DM9000) += dm9000.o obj-$(CONFIG_PASEMI_MAC) += pasemi_mac_driver.o diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 73baa7a3bb0e..c05d38d46350 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -126,6 +126,27 @@ static struct phy_driver lan8700_driver = { .driver = { .owner = THIS_MODULE, } }; +static struct phy_driver lan911x_int_driver = { + .phy_id = 0x0007c0d0, /* OUI=0x00800f, Model#=0x0d */ + .phy_id_mask = 0xfffffff0, + .name = "SMSC LAN911x Internal PHY", + + .features = (PHY_BASIC_FEATURES | SUPPORTED_Pause + | SUPPORTED_Asym_Pause), + .flags = PHY_HAS_INTERRUPT | PHY_HAS_MAGICANEG, + + /* basic functions */ + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .config_init = smsc_phy_config_init, + + /* IRQ related */ + .ack_interrupt = smsc_phy_ack_interrupt, + .config_intr = smsc_phy_config_intr, + + .driver = { .owner = THIS_MODULE, } +}; + static int __init smsc_init(void) { int ret; @@ -142,8 +163,14 @@ static int __init smsc_init(void) if (ret) goto err3; + ret = phy_driver_register (&lan911x_int_driver); + if (ret) + goto err4; + return 0; +err4: + phy_driver_unregister (&lan8700_driver); err3: phy_driver_unregister (&lan8187_driver); err2: @@ -154,6 +181,7 @@ err1: static void __exit smsc_exit(void) { + phy_driver_unregister (&lan911x_int_driver); phy_driver_unregister (&lan8700_driver); phy_driver_unregister (&lan8187_driver); phy_driver_unregister (&lan83c185_driver); diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c new file mode 100644 index 000000000000..fe517880fc97 --- /dev/null +++ b/drivers/net/smsc911x.c @@ -0,0 +1,2091 @@ +/*************************************************************************** + * + * Copyright (C) 2004-2008 SMSC + * Copyright (C) 2005-2008 ARM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + *************************************************************************** + * Rewritten, heavily based on smsc911x simple driver by SMSC. + * Partly uses io macros from smc91x.c by Nicolas Pitre + * + * Supported devices: + * LAN9115, LAN9116, LAN9117, LAN9118 + * LAN9215, LAN9216, LAN9217, LAN9218 + * LAN9210, LAN9211 + * LAN9220, LAN9221 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "smsc911x.h" + +#define SMSC_CHIPNAME "smsc911x" +#define SMSC_MDIONAME "smsc911x-mdio" +#define SMSC_DRV_VERSION "2008-10-21" + +MODULE_LICENSE("GPL"); +MODULE_VERSION(SMSC_DRV_VERSION); + +#if USE_DEBUG > 0 +static int debug = 16; +#else +static int debug = 3; +#endif + +module_param(debug, int, 0); +MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); + +struct smsc911x_data { + void __iomem *ioaddr; + + unsigned int idrev; + + /* used to decide which workarounds apply */ + unsigned int generation; + + /* device configuration (copied from platform_data during probe) */ + unsigned int irq_polarity; + unsigned int irq_type; + phy_interface_t phy_interface; + + /* This needs to be acquired before calling any of below: + * smsc911x_mac_read(), smsc911x_mac_write() + */ + spinlock_t mac_lock; + +#if (!SMSC_CAN_USE_32BIT) + /* spinlock to ensure 16-bit accesses are serialised */ + spinlock_t dev_lock; +#endif + + struct phy_device *phy_dev; + struct mii_bus *mii_bus; + int phy_irq[PHY_MAX_ADDR]; + unsigned int using_extphy; + int last_duplex; + int last_carrier; + + u32 msg_enable; + unsigned int gpio_setting; + unsigned int gpio_orig_setting; + struct net_device *dev; + struct napi_struct napi; + + unsigned int software_irq_signal; + +#ifdef USE_PHY_WORK_AROUND +#define MIN_PACKET_SIZE (64) + char loopback_tx_pkt[MIN_PACKET_SIZE]; + char loopback_rx_pkt[MIN_PACKET_SIZE]; + unsigned int resetcount; +#endif + + /* Members for Multicast filter workaround */ + unsigned int multicast_update_pending; + unsigned int set_bits_mask; + unsigned int clear_bits_mask; + unsigned int hashhi; + unsigned int hashlo; +}; + +#if SMSC_CAN_USE_32BIT + +static inline u32 smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg) +{ + return readl(pdata->ioaddr + reg); +} + +static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg, + u32 val) +{ + writel(val, pdata->ioaddr + reg); +} + +/* Writes a packet to the TX_DATA_FIFO */ +static inline void +smsc911x_tx_writefifo(struct smsc911x_data *pdata, unsigned int *buf, + unsigned int wordcount) +{ + writesl(pdata->ioaddr + TX_DATA_FIFO, buf, wordcount); +} + +/* Reads a packet out of the RX_DATA_FIFO */ +static inline void +smsc911x_rx_readfifo(struct smsc911x_data *pdata, unsigned int *buf, + unsigned int wordcount) +{ + readsl(pdata->ioaddr + RX_DATA_FIFO, buf, wordcount); +} + +#else /* SMSC_CAN_USE_32BIT */ + +/* These 16-bit access functions are significantly slower, due to the locking + * necessary. If your bus hardware can be configured to do this for you + * (in response to a single 32-bit operation from software), you should use + * the 32-bit access functions instead. */ + +static inline u32 smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg) +{ + unsigned long flags; + u32 data; + + /* these two 16-bit reads must be performed consecutively, so must + * not be interrupted by our own ISR (which would start another + * read operation) */ + spin_lock_irqsave(&pdata->dev_lock, flags); + data = ((readw(pdata->ioaddr + reg) & 0xFFFF) | + ((readw(pdata->ioaddr + reg + 2) & 0xFFFF) << 16)); + spin_unlock_irqrestore(&pdata->dev_lock, flags); + + return data; +} + +static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg, + u32 val) +{ + unsigned long flags; + + /* these two 16-bit writes must be performed consecutively, so must + * not be interrupted by our own ISR (which would start another + * read operation) */ + spin_lock_irqsave(&pdata->dev_lock, flags); + writew(val & 0xFFFF, pdata->ioaddr + reg); + writew((val >> 16) & 0xFFFF, pdata->ioaddr + reg + 2); + spin_unlock_irqrestore(&pdata->dev_lock, flags); +} + +/* Writes a packet to the TX_DATA_FIFO */ +static inline void +smsc911x_tx_writefifo(struct smsc911x_data *pdata, unsigned int *buf, + unsigned int wordcount) +{ + while (wordcount--) + smsc911x_reg_write(pdata, TX_DATA_FIFO, *buf++); +} + +/* Reads a packet out of the RX_DATA_FIFO */ +static inline void +smsc911x_rx_readfifo(struct smsc911x_data *pdata, unsigned int *buf, + unsigned int wordcount) +{ + while (wordcount--) + *buf++ = smsc911x_reg_read(pdata, RX_DATA_FIFO); +} + +#endif /* SMSC_CAN_USE_32BIT */ + +/* waits for MAC not busy, with timeout. Only called by smsc911x_mac_read + * and smsc911x_mac_write, so assumes mac_lock is held */ +static int smsc911x_mac_complete(struct smsc911x_data *pdata) +{ + int i; + u32 val; + + SMSC_ASSERT_MAC_LOCK(pdata); + + for (i = 0; i < 40; i++) { + val = smsc911x_reg_read(pdata, MAC_CSR_CMD); + if (!(val & MAC_CSR_CMD_CSR_BUSY_)) + return 0; + } + SMSC_WARNING(HW, "Timed out waiting for MAC not BUSY. " + "MAC_CSR_CMD: 0x%08X", val); + return -EIO; +} + +/* Fetches a MAC register value. Assumes mac_lock is acquired */ +static u32 smsc911x_mac_read(struct smsc911x_data *pdata, unsigned int offset) +{ + unsigned int temp; + + SMSC_ASSERT_MAC_LOCK(pdata); + + temp = smsc911x_reg_read(pdata, MAC_CSR_CMD); + if (unlikely(temp & MAC_CSR_CMD_CSR_BUSY_)) { + SMSC_WARNING(HW, "MAC busy at entry"); + return 0xFFFFFFFF; + } + + /* Send the MAC cmd */ + smsc911x_reg_write(pdata, MAC_CSR_CMD, ((offset & 0xFF) | + MAC_CSR_CMD_CSR_BUSY_ | MAC_CSR_CMD_R_NOT_W_)); + + /* Workaround for hardware read-after-write restriction */ + temp = smsc911x_reg_read(pdata, BYTE_TEST); + + /* Wait for the read to complete */ + if (likely(smsc911x_mac_complete(pdata) == 0)) + return smsc911x_reg_read(pdata, MAC_CSR_DATA); + + SMSC_WARNING(HW, "MAC busy after read"); + return 0xFFFFFFFF; +} + +/* Set a mac register, mac_lock must be acquired before calling */ +static void smsc911x_mac_write(struct smsc911x_data *pdata, + unsigned int offset, u32 val) +{ + unsigned int temp; + + SMSC_ASSERT_MAC_LOCK(pdata); + + temp = smsc911x_reg_read(pdata, MAC_CSR_CMD); + if (unlikely(temp & MAC_CSR_CMD_CSR_BUSY_)) { + SMSC_WARNING(HW, + "smsc911x_mac_write failed, MAC busy at entry"); + return; + } + + /* Send data to write */ + smsc911x_reg_write(pdata, MAC_CSR_DATA, val); + + /* Write the actual data */ + smsc911x_reg_write(pdata, MAC_CSR_CMD, ((offset & 0xFF) | + MAC_CSR_CMD_CSR_BUSY_)); + + /* Workaround for hardware read-after-write restriction */ + temp = smsc911x_reg_read(pdata, BYTE_TEST); + + /* Wait for the write to complete */ + if (likely(smsc911x_mac_complete(pdata) == 0)) + return; + + SMSC_WARNING(HW, + "smsc911x_mac_write failed, MAC busy after write"); +} + +/* Get a phy register */ +static int smsc911x_mii_read(struct mii_bus *bus, int phyaddr, int regidx) +{ + struct smsc911x_data *pdata = (struct smsc911x_data *)bus->priv; + unsigned long flags; + unsigned int addr; + int i, reg; + + spin_lock_irqsave(&pdata->mac_lock, flags); + + /* Confirm MII not busy */ + if (unlikely(smsc911x_mac_read(pdata, MII_ACC) & MII_ACC_MII_BUSY_)) { + SMSC_WARNING(HW, + "MII is busy in smsc911x_mii_read???"); + reg = -EIO; + goto out; + } + + /* Set the address, index & direction (read from PHY) */ + addr = ((phyaddr & 0x1F) << 11) | ((regidx & 0x1F) << 6); + smsc911x_mac_write(pdata, MII_ACC, addr); + + /* Wait for read to complete w/ timeout */ + for (i = 0; i < 100; i++) + if (!(smsc911x_mac_read(pdata, MII_ACC) & MII_ACC_MII_BUSY_)) { + reg = smsc911x_mac_read(pdata, MII_DATA); + goto out; + } + + SMSC_WARNING(HW, "Timed out waiting for MII write to finish"); + reg = -EIO; + +out: + spin_unlock_irqrestore(&pdata->mac_lock, flags); + return reg; +} + +/* Set a phy register */ +static int smsc911x_mii_write(struct mii_bus *bus, int phyaddr, int regidx, + u16 val) +{ + struct smsc911x_data *pdata = (struct smsc911x_data *)bus->priv; + unsigned long flags; + unsigned int addr; + int i, reg; + + spin_lock_irqsave(&pdata->mac_lock, flags); + + /* Confirm MII not busy */ + if (unlikely(smsc911x_mac_read(pdata, MII_ACC) & MII_ACC_MII_BUSY_)) { + SMSC_WARNING(HW, + "MII is busy in smsc911x_mii_write???"); + reg = -EIO; + goto out; + } + + /* Put the data to write in the MAC */ + smsc911x_mac_write(pdata, MII_DATA, val); + + /* Set the address, index & direction (write to PHY) */ + addr = ((phyaddr & 0x1F) << 11) | ((regidx & 0x1F) << 6) | + MII_ACC_MII_WRITE_; + smsc911x_mac_write(pdata, MII_ACC, addr); + + /* Wait for write to complete w/ timeout */ + for (i = 0; i < 100; i++) + if (!(smsc911x_mac_read(pdata, MII_ACC) & MII_ACC_MII_BUSY_)) { + reg = 0; + goto out; + } + + SMSC_WARNING(HW, "Timed out waiting for MII write to finish"); + reg = -EIO; + +out: + spin_unlock_irqrestore(&pdata->mac_lock, flags); + return reg; +} + +/* Autodetects and initialises external phy for SMSC9115 and SMSC9117 flavors. + * If something goes wrong, returns -ENODEV to revert back to internal phy. + * Performed at initialisation only, so interrupts are enabled */ +static int smsc911x_phy_initialise_external(struct smsc911x_data *pdata) +{ + unsigned int hwcfg = smsc911x_reg_read(pdata, HW_CFG); + + /* External phy is requested, supported, and detected */ + if (hwcfg & HW_CFG_EXT_PHY_DET_) { + + /* Switch to external phy. Assuming tx and rx are stopped + * because smsc911x_phy_initialise is called before + * smsc911x_rx_initialise and tx_initialise. */ + + /* Disable phy clocks to the MAC */ + hwcfg &= (~HW_CFG_PHY_CLK_SEL_); + hwcfg |= HW_CFG_PHY_CLK_SEL_CLK_DIS_; + smsc911x_reg_write(pdata, HW_CFG, hwcfg); + udelay(10); /* Enough time for clocks to stop */ + + /* Switch to external phy */ + hwcfg |= HW_CFG_EXT_PHY_EN_; + smsc911x_reg_write(pdata, HW_CFG, hwcfg); + + /* Enable phy clocks to the MAC */ + hwcfg &= (~HW_CFG_PHY_CLK_SEL_); + hwcfg |= HW_CFG_PHY_CLK_SEL_EXT_PHY_; + smsc911x_reg_write(pdata, HW_CFG, hwcfg); + udelay(10); /* Enough time for clocks to restart */ + + hwcfg |= HW_CFG_SMI_SEL_; + smsc911x_reg_write(pdata, HW_CFG, hwcfg); + + SMSC_TRACE(HW, "Successfully switched to external PHY"); + pdata->using_extphy = 1; + } else { + SMSC_WARNING(HW, "No external PHY detected, " + "Using internal PHY instead."); + /* Use internal phy */ + return -ENODEV; + } + return 0; +} + +/* Fetches a tx status out of the status fifo */ +static unsigned int smsc911x_tx_get_txstatus(struct smsc911x_data *pdata) +{ + unsigned int result = + smsc911x_reg_read(pdata, TX_FIFO_INF) & TX_FIFO_INF_TSUSED_; + + if (result != 0) + result = smsc911x_reg_read(pdata, TX_STATUS_FIFO); + + return result; +} + +/* Fetches the next rx status */ +static unsigned int smsc911x_rx_get_rxstatus(struct smsc911x_data *pdata) +{ + unsigned int result = + smsc911x_reg_read(pdata, RX_FIFO_INF) & RX_FIFO_INF_RXSUSED_; + + if (result != 0) + result = smsc911x_reg_read(pdata, RX_STATUS_FIFO); + + return result; +} + +#ifdef USE_PHY_WORK_AROUND +static int smsc911x_phy_check_loopbackpkt(struct smsc911x_data *pdata) +{ + unsigned int tries; + u32 wrsz; + u32 rdsz; + ulong bufp; + + for (tries = 0; tries < 10; tries++) { + unsigned int txcmd_a; + unsigned int txcmd_b; + unsigned int status; + unsigned int pktlength; + unsigned int i; + + /* Zero-out rx packet memory */ + memset(pdata->loopback_rx_pkt, 0, MIN_PACKET_SIZE); + + /* Write tx packet to 118 */ + txcmd_a = (u32)((ulong)pdata->loopback_tx_pkt & 0x03) << 16; + txcmd_a |= TX_CMD_A_FIRST_SEG_ | TX_CMD_A_LAST_SEG_; + txcmd_a |= MIN_PACKET_SIZE; + + txcmd_b = MIN_PACKET_SIZE << 16 | MIN_PACKET_SIZE; + + smsc911x_reg_write(pdata, TX_DATA_FIFO, txcmd_a); + smsc911x_reg_write(pdata, TX_DATA_FIFO, txcmd_b); + + bufp = (ulong)pdata->loopback_tx_pkt & (~0x3); + wrsz = MIN_PACKET_SIZE + 3; + wrsz += (u32)((ulong)pdata->loopback_tx_pkt & 0x3); + wrsz >>= 2; + + smsc911x_tx_writefifo(pdata, (unsigned int *)bufp, wrsz); + + /* Wait till transmit is done */ + i = 60; + do { + udelay(5); + status = smsc911x_tx_get_txstatus(pdata); + } while ((i--) && (!status)); + + if (!status) { + SMSC_WARNING(HW, "Failed to transmit " + "during loopback test"); + continue; + } + if (status & TX_STS_ES_) { + SMSC_WARNING(HW, "Transmit encountered " + "errors during loopback test"); + continue; + } + + /* Wait till receive is done */ + i = 60; + do { + udelay(5); + status = smsc911x_rx_get_rxstatus(pdata); + } while ((i--) && (!status)); + + if (!status) { + SMSC_WARNING(HW, + "Failed to receive during loopback test"); + continue; + } + if (status & RX_STS_ES_) { + SMSC_WARNING(HW, "Receive encountered " + "errors during loopback test"); + continue; + } + + pktlength = ((status & 0x3FFF0000UL) >> 16); + bufp = (ulong)pdata->loopback_rx_pkt; + rdsz = pktlength + 3; + rdsz += (u32)((ulong)pdata->loopback_rx_pkt & 0x3); + rdsz >>= 2; + + smsc911x_rx_readfifo(pdata, (unsigned int *)bufp, rdsz); + + if (pktlength != (MIN_PACKET_SIZE + 4)) { + SMSC_WARNING(HW, "Unexpected packet size " + "during loop back test, size=%d, will retry", + pktlength); + } else { + unsigned int j; + int mismatch = 0; + for (j = 0; j < MIN_PACKET_SIZE; j++) { + if (pdata->loopback_tx_pkt[j] + != pdata->loopback_rx_pkt[j]) { + mismatch = 1; + break; + } + } + if (!mismatch) { + SMSC_TRACE(HW, "Successfully verified " + "loopback packet"); + return 0; + } else { + SMSC_WARNING(HW, "Data mismatch " + "during loop back test, will retry"); + } + } + } + + return -EIO; +} + +static int smsc911x_phy_reset(struct smsc911x_data *pdata) +{ + struct phy_device *phy_dev = pdata->phy_dev; + unsigned int temp; + unsigned int i = 100000; + + BUG_ON(!phy_dev); + BUG_ON(!phy_dev->bus); + + SMSC_TRACE(HW, "Performing PHY BCR Reset"); + smsc911x_mii_write(phy_dev->bus, phy_dev->addr, MII_BMCR, BMCR_RESET); + do { + msleep(1); + temp = smsc911x_mii_read(phy_dev->bus, phy_dev->addr, + MII_BMCR); + } while ((i--) && (temp & BMCR_RESET)); + + if (temp & BMCR_RESET) { + SMSC_WARNING(HW, "PHY reset failed to complete."); + return -EIO; + } + /* Extra delay required because the phy may not be completed with + * its reset when BMCR_RESET is cleared. Specs say 256 uS is + * enough delay but using 1ms here to be safe */ + msleep(1); + + return 0; +} + +static int smsc911x_phy_loopbacktest(struct net_device *dev) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + struct phy_device *phy_dev = pdata->phy_dev; + int result = -EIO; + unsigned int i, val; + unsigned long flags; + + /* Initialise tx packet using broadcast destination address */ + memset(pdata->loopback_tx_pkt, 0xff, ETH_ALEN); + + /* Use incrementing source address */ + for (i = 6; i < 12; i++) + pdata->loopback_tx_pkt[i] = (char)i; + + /* Set length type field */ + pdata->loopback_tx_pkt[12] = 0x00; + pdata->loopback_tx_pkt[13] = 0x00; + + for (i = 14; i < MIN_PACKET_SIZE; i++) + pdata->loopback_tx_pkt[i] = (char)i; + + val = smsc911x_reg_read(pdata, HW_CFG); + val &= HW_CFG_TX_FIF_SZ_; + val |= HW_CFG_SF_; + smsc911x_reg_write(pdata, HW_CFG, val); + + smsc911x_reg_write(pdata, TX_CFG, TX_CFG_TX_ON_); + smsc911x_reg_write(pdata, RX_CFG, + (u32)((ulong)pdata->loopback_rx_pkt & 0x03) << 8); + + for (i = 0; i < 10; i++) { + /* Set PHY to 10/FD, no ANEG, and loopback mode */ + smsc911x_mii_write(phy_dev->bus, phy_dev->addr, MII_BMCR, + BMCR_LOOPBACK | BMCR_FULLDPLX); + + /* Enable MAC tx/rx, FD */ + spin_lock_irqsave(&pdata->mac_lock, flags); + smsc911x_mac_write(pdata, MAC_CR, MAC_CR_FDPX_ + | MAC_CR_TXEN_ | MAC_CR_RXEN_); + spin_unlock_irqrestore(&pdata->mac_lock, flags); + + if (smsc911x_phy_check_loopbackpkt(pdata) == 0) { + result = 0; + break; + } + pdata->resetcount++; + + /* Disable MAC rx */ + spin_lock_irqsave(&pdata->mac_lock, flags); + smsc911x_mac_write(pdata, MAC_CR, 0); + spin_unlock_irqrestore(&pdata->mac_lock, flags); + + smsc911x_phy_reset(pdata); + } + + /* Disable MAC */ + spin_lock_irqsave(&pdata->mac_lock, flags); + smsc911x_mac_write(pdata, MAC_CR, 0); + spin_unlock_irqrestore(&pdata->mac_lock, flags); + + /* Cancel PHY loopback mode */ + smsc911x_mii_write(phy_dev->bus, phy_dev->addr, MII_BMCR, 0); + + smsc911x_reg_write(pdata, TX_CFG, 0); + smsc911x_reg_write(pdata, RX_CFG, 0); + + return result; +} +#endif /* USE_PHY_WORK_AROUND */ + +static u8 smsc95xx_resolve_flowctrl_fulldplx(u16 lcladv, u16 rmtadv) +{ + u8 cap = 0; + + if (lcladv & ADVERTISE_PAUSE_CAP) { + if (lcladv & ADVERTISE_PAUSE_ASYM) { + if (rmtadv & LPA_PAUSE_CAP) + cap = FLOW_CTRL_TX | FLOW_CTRL_RX; + else if (rmtadv & LPA_PAUSE_ASYM) + cap = FLOW_CTRL_RX; + } else { + if (rmtadv & LPA_PAUSE_CAP) + cap = FLOW_CTRL_TX | FLOW_CTRL_RX; + } + } else if (lcladv & ADVERTISE_PAUSE_ASYM) { + if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM)) + cap = FLOW_CTRL_TX; + } + + return cap; +} + +static void smsc911x_phy_update_flowcontrol(struct smsc911x_data *pdata) +{ + struct phy_device *phy_dev = pdata->phy_dev; + u32 afc = smsc911x_reg_read(pdata, AFC_CFG); + u32 flow; + unsigned long flags; + + if (phy_dev->duplex == DUPLEX_FULL) { + u16 lcladv = phy_read(phy_dev, MII_ADVERTISE); + u16 rmtadv = phy_read(phy_dev, MII_LPA); + u8 cap = smsc95xx_resolve_flowctrl_fulldplx(lcladv, rmtadv); + + if (cap & FLOW_CTRL_RX) + flow = 0xFFFF0002; + else + flow = 0; + + if (cap & FLOW_CTRL_TX) + afc |= 0xF; + else + afc &= ~0xF; + + SMSC_TRACE(HW, "rx pause %s, tx pause %s", + (cap & FLOW_CTRL_RX ? "enabled" : "disabled"), + (cap & FLOW_CTRL_TX ? "enabled" : "disabled")); + } else { + SMSC_TRACE(HW, "half duplex"); + flow = 0; + afc |= 0xF; + } + + spin_lock_irqsave(&pdata->mac_lock, flags); + smsc911x_mac_write(pdata, FLOW, flow); + spin_unlock_irqrestore(&pdata->mac_lock, flags); + + smsc911x_reg_write(pdata, AFC_CFG, afc); +} + +/* Update link mode if anything has changed. Called periodically when the + * PHY is in polling mode, even if nothing has changed. */ +static void smsc911x_phy_adjust_link(struct net_device *dev) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + struct phy_device *phy_dev = pdata->phy_dev; + unsigned long flags; + int carrier; + + if (phy_dev->duplex != pdata->last_duplex) { + unsigned int mac_cr; + SMSC_TRACE(HW, "duplex state has changed"); + + spin_lock_irqsave(&pdata->mac_lock, flags); + mac_cr = smsc911x_mac_read(pdata, MAC_CR); + if (phy_dev->duplex) { + SMSC_TRACE(HW, + "configuring for full duplex mode"); + mac_cr |= MAC_CR_FDPX_; + } else { + SMSC_TRACE(HW, + "configuring for half duplex mode"); + mac_cr &= ~MAC_CR_FDPX_; + } + smsc911x_mac_write(pdata, MAC_CR, mac_cr); + spin_unlock_irqrestore(&pdata->mac_lock, flags); + + smsc911x_phy_update_flowcontrol(pdata); + pdata->last_duplex = phy_dev->duplex; + } + + carrier = netif_carrier_ok(dev); + if (carrier != pdata->last_carrier) { + SMSC_TRACE(HW, "carrier state has changed"); + if (carrier) { + SMSC_TRACE(HW, "configuring for carrier OK"); + if ((pdata->gpio_orig_setting & GPIO_CFG_LED1_EN_) && + (!pdata->using_extphy)) { + /* Restore orginal GPIO configuration */ + pdata->gpio_setting = pdata->gpio_orig_setting; + smsc911x_reg_write(pdata, GPIO_CFG, + pdata->gpio_setting); + } + } else { + SMSC_TRACE(HW, "configuring for no carrier"); + /* Check global setting that LED1 + * usage is 10/100 indicator */ + pdata->gpio_setting = smsc911x_reg_read(pdata, + GPIO_CFG); + if ((pdata->gpio_setting & GPIO_CFG_LED1_EN_) + && (!pdata->using_extphy)) { + /* Force 10/100 LED off, after saving + * orginal GPIO configuration */ + pdata->gpio_orig_setting = pdata->gpio_setting; + + pdata->gpio_setting &= ~GPIO_CFG_LED1_EN_; + pdata->gpio_setting |= (GPIO_CFG_GPIOBUF0_ + | GPIO_CFG_GPIODIR0_ + | GPIO_CFG_GPIOD0_); + smsc911x_reg_write(pdata, GPIO_CFG, + pdata->gpio_setting); + } + } + pdata->last_carrier = carrier; + } +} + +static int smsc911x_mii_probe(struct net_device *dev) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + struct phy_device *phydev = NULL; + int phy_addr; + + /* find the first phy */ + for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) { + if (pdata->mii_bus->phy_map[phy_addr]) { + phydev = pdata->mii_bus->phy_map[phy_addr]; + SMSC_TRACE(PROBE, "PHY %d: addr %d, phy_id 0x%08X", + phy_addr, phydev->addr, phydev->phy_id); + break; + } + } + + if (!phydev) { + pr_err("%s: no PHY found\n", dev->name); + return -ENODEV; + } + + phydev = phy_connect(dev, phydev->dev.bus_id, + &smsc911x_phy_adjust_link, 0, pdata->phy_interface); + + if (IS_ERR(phydev)) { + pr_err("%s: Could not attach to PHY\n", dev->name); + return PTR_ERR(phydev); + } + + pr_info("%s: attached PHY driver [%s] (mii_bus:phy_addr=%s, irq=%d)\n", + dev->name, phydev->drv->name, phydev->dev.bus_id, phydev->irq); + + /* mask with MAC supported features */ + phydev->supported &= (PHY_BASIC_FEATURES | SUPPORTED_Pause | + SUPPORTED_Asym_Pause); + phydev->advertising = phydev->supported; + + pdata->phy_dev = phydev; + pdata->last_duplex = -1; + pdata->last_carrier = -1; + +#ifdef USE_PHY_WORK_AROUND + if (smsc911x_phy_loopbacktest(dev) < 0) { + SMSC_WARNING(HW, "Failed Loop Back Test"); + return -ENODEV; + } + SMSC_TRACE(HW, "Passed Loop Back Test"); +#endif /* USE_PHY_WORK_AROUND */ + + SMSC_TRACE(HW, "phy initialised succesfully"); + return 0; +} + +static int __devinit smsc911x_mii_init(struct platform_device *pdev, + struct net_device *dev) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + int err = -ENXIO, i; + + pdata->mii_bus = mdiobus_alloc(); + if (!pdata->mii_bus) { + err = -ENOMEM; + goto err_out_1; + } + + pdata->mii_bus->name = SMSC_MDIONAME; + snprintf(pdata->mii_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id); + pdata->mii_bus->priv = pdata; + pdata->mii_bus->read = smsc911x_mii_read; + pdata->mii_bus->write = smsc911x_mii_write; + pdata->mii_bus->irq = pdata->phy_irq; + for (i = 0; i < PHY_MAX_ADDR; ++i) + pdata->mii_bus->irq[i] = PHY_POLL; + + pdata->mii_bus->parent = &pdev->dev; + dev_set_drvdata(&pdev->dev, &pdata->mii_bus); + + pdata->using_extphy = 0; + + switch (pdata->idrev & 0xFFFF0000) { + case 0x01170000: + case 0x01150000: + case 0x117A0000: + case 0x115A0000: + /* External PHY supported, try to autodetect */ + if (smsc911x_phy_initialise_external(pdata) < 0) { + SMSC_TRACE(HW, "No external PHY detected, " + "using internal PHY"); + } + break; + default: + SMSC_TRACE(HW, "External PHY is not supported, " + "using internal PHY"); + break; + } + + if (!pdata->using_extphy) { + /* Mask all PHYs except ID 1 (internal) */ + pdata->mii_bus->phy_mask = ~(1 << 1); + } + + if (mdiobus_register(pdata->mii_bus)) { + SMSC_WARNING(PROBE, "Error registering mii bus"); + goto err_out_free_bus_2; + } + + if (smsc911x_mii_probe(dev) < 0) { + SMSC_WARNING(PROBE, "Error registering mii bus"); + goto err_out_unregister_bus_3; + } + + return 0; + +err_out_unregister_bus_3: + mdiobus_unregister(pdata->mii_bus); +err_out_free_bus_2: + mdiobus_free(pdata->mii_bus); +err_out_1: + return err; +} + +/* Gets the number of tx statuses in the fifo */ +static unsigned int smsc911x_tx_get_txstatcount(struct smsc911x_data *pdata) +{ + return (smsc911x_reg_read(pdata, TX_FIFO_INF) + & TX_FIFO_INF_TSUSED_) >> 16; +} + +/* Reads tx statuses and increments counters where necessary */ +static void smsc911x_tx_update_txcounters(struct net_device *dev) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + unsigned int tx_stat; + + while ((tx_stat = smsc911x_tx_get_txstatus(pdata)) != 0) { + if (unlikely(tx_stat & 0x80000000)) { + /* In this driver the packet tag is used as the packet + * length. Since a packet length can never reach the + * size of 0x8000, this bit is reserved. It is worth + * noting that the "reserved bit" in the warning above + * does not reference a hardware defined reserved bit + * but rather a driver defined one. + */ + SMSC_WARNING(HW, + "Packet tag reserved bit is high"); + } else { + if (unlikely(tx_stat & 0x00008000)) { + dev->stats.tx_errors++; + } else { + dev->stats.tx_packets++; + dev->stats.tx_bytes += (tx_stat >> 16); + } + if (unlikely(tx_stat & 0x00000100)) { + dev->stats.collisions += 16; + dev->stats.tx_aborted_errors += 1; + } else { + dev->stats.collisions += + ((tx_stat >> 3) & 0xF); + } + if (unlikely(tx_stat & 0x00000800)) + dev->stats.tx_carrier_errors += 1; + if (unlikely(tx_stat & 0x00000200)) { + dev->stats.collisions++; + dev->stats.tx_aborted_errors++; + } + } + } +} + +/* Increments the Rx error counters */ +static void +smsc911x_rx_counterrors(struct net_device *dev, unsigned int rxstat) +{ + int crc_err = 0; + + if (unlikely(rxstat & 0x00008000)) { + dev->stats.rx_errors++; + if (unlikely(rxstat & 0x00000002)) { + dev->stats.rx_crc_errors++; + crc_err = 1; + } + } + if (likely(!crc_err)) { + if (unlikely((rxstat & 0x00001020) == 0x00001020)) { + /* Frame type indicates length, + * and length error is set */ + dev->stats.rx_length_errors++; + } + if (rxstat & RX_STS_MCAST_) + dev->stats.multicast++; + } +} + +/* Quickly dumps bad packets */ +static void +smsc911x_rx_fastforward(struct smsc911x_data *pdata, unsigned int pktbytes) +{ + unsigned int pktwords = (pktbytes + NET_IP_ALIGN + 3) >> 2; + + if (likely(pktwords >= 4)) { + unsigned int timeout = 500; + unsigned int val; + smsc911x_reg_write(pdata, RX_DP_CTRL, RX_DP_CTRL_RX_FFWD_); + do { + udelay(1); + val = smsc911x_reg_read(pdata, RX_DP_CTRL); + } while (timeout-- && (val & RX_DP_CTRL_RX_FFWD_)); + + if (unlikely(timeout == 0)) + SMSC_WARNING(HW, "Timed out waiting for " + "RX FFWD to finish, RX_DP_CTRL: 0x%08X", val); + } else { + unsigned int temp; + while (pktwords--) + temp = smsc911x_reg_read(pdata, RX_DATA_FIFO); + } +} + +/* NAPI poll function */ +static int smsc911x_poll(struct napi_struct *napi, int budget) +{ + struct smsc911x_data *pdata = + container_of(napi, struct smsc911x_data, napi); + struct net_device *dev = pdata->dev; + int npackets = 0; + + while (likely(netif_running(dev)) && (npackets < budget)) { + unsigned int pktlength; + unsigned int pktwords; + struct sk_buff *skb; + unsigned int rxstat = smsc911x_rx_get_rxstatus(pdata); + + if (!rxstat) { + unsigned int temp; + /* We processed all packets available. Tell NAPI it can + * stop polling then re-enable rx interrupts */ + smsc911x_reg_write(pdata, INT_STS, INT_STS_RSFL_); + netif_rx_complete(dev, napi); + temp = smsc911x_reg_read(pdata, INT_EN); + temp |= INT_EN_RSFL_EN_; + smsc911x_reg_write(pdata, INT_EN, temp); + break; + } + + /* Count packet for NAPI scheduling, even if it has an error. + * Error packets still require cycles to discard */ + npackets++; + + pktlength = ((rxstat & 0x3FFF0000) >> 16); + pktwords = (pktlength + NET_IP_ALIGN + 3) >> 2; + smsc911x_rx_counterrors(dev, rxstat); + + if (unlikely(rxstat & RX_STS_ES_)) { + SMSC_WARNING(RX_ERR, + "Discarding packet with error bit set"); + /* Packet has an error, discard it and continue with + * the next */ + smsc911x_rx_fastforward(pdata, pktwords); + dev->stats.rx_dropped++; + continue; + } + + skb = netdev_alloc_skb(dev, pktlength + NET_IP_ALIGN); + if (unlikely(!skb)) { + SMSC_WARNING(RX_ERR, + "Unable to allocate skb for rx packet"); + /* Drop the packet and stop this polling iteration */ + smsc911x_rx_fastforward(pdata, pktwords); + dev->stats.rx_dropped++; + break; + } + + skb->data = skb->head; + skb_reset_tail_pointer(skb); + + /* Align IP on 16B boundary */ + skb_reserve(skb, NET_IP_ALIGN); + skb_put(skb, pktlength - 4); + smsc911x_rx_readfifo(pdata, (unsigned int *)skb->head, + pktwords); + skb->protocol = eth_type_trans(skb, dev); + skb->ip_summed = CHECKSUM_NONE; + netif_receive_skb(skb); + + /* Update counters */ + dev->stats.rx_packets++; + dev->stats.rx_bytes += (pktlength - 4); + dev->last_rx = jiffies; + } + + /* Return total received packets */ + return npackets; +} + +/* Returns hash bit number for given MAC address + * Example: + * 01 00 5E 00 00 01 -> returns bit number 31 */ +static unsigned int smsc911x_hash(char addr[ETH_ALEN]) +{ + return (ether_crc(ETH_ALEN, addr) >> 26) & 0x3f; +} + +static void smsc911x_rx_multicast_update(struct smsc911x_data *pdata) +{ + /* Performs the multicast & mac_cr update. This is called when + * safe on the current hardware, and with the mac_lock held */ + unsigned int mac_cr; + + SMSC_ASSERT_MAC_LOCK(pdata); + + mac_cr = smsc911x_mac_read(pdata, MAC_CR); + mac_cr |= pdata->set_bits_mask; + mac_cr &= ~(pdata->clear_bits_mask); + smsc911x_mac_write(pdata, MAC_CR, mac_cr); + smsc911x_mac_write(pdata, HASHH, pdata->hashhi); + smsc911x_mac_write(pdata, HASHL, pdata->hashlo); + SMSC_TRACE(HW, "maccr 0x%08X, HASHH 0x%08X, HASHL 0x%08X", + mac_cr, pdata->hashhi, pdata->hashlo); +} + +static void smsc911x_rx_multicast_update_workaround(struct smsc911x_data *pdata) +{ + unsigned int mac_cr; + + /* This function is only called for older LAN911x devices + * (revA or revB), where MAC_CR, HASHH and HASHL should not + * be modified during Rx - newer devices immediately update the + * registers. + * + * This is called from interrupt context */ + + spin_lock(&pdata->mac_lock); + + /* Check Rx has stopped */ + if (smsc911x_mac_read(pdata, MAC_CR) & MAC_CR_RXEN_) + SMSC_WARNING(DRV, "Rx not stopped"); + + /* Perform the update - safe to do now Rx has stopped */ + smsc911x_rx_multicast_update(pdata); + + /* Re-enable Rx */ + mac_cr = smsc911x_mac_read(pdata, MAC_CR); + mac_cr |= MAC_CR_RXEN_; + smsc911x_mac_write(pdata, MAC_CR, mac_cr); + + pdata->multicast_update_pending = 0; + + spin_unlock(&pdata->mac_lock); +} + +static int smsc911x_soft_reset(struct smsc911x_data *pdata) +{ + unsigned int timeout; + unsigned int temp; + + /* Reset the LAN911x */ + smsc911x_reg_write(pdata, HW_CFG, HW_CFG_SRST_); + timeout = 10; + do { + udelay(10); + temp = smsc911x_reg_read(pdata, HW_CFG); + } while ((--timeout) && (temp & HW_CFG_SRST_)); + + if (unlikely(temp & HW_CFG_SRST_)) { + SMSC_WARNING(DRV, "Failed to complete reset"); + return -EIO; + } + return 0; +} + +/* Sets the device MAC address to dev_addr, called with mac_lock held */ +static void +smsc911x_set_mac_address(struct smsc911x_data *pdata, u8 dev_addr[6]) +{ + u32 mac_high16 = (dev_addr[5] << 8) | dev_addr[4]; + u32 mac_low32 = (dev_addr[3] << 24) | (dev_addr[2] << 16) | + (dev_addr[1] << 8) | dev_addr[0]; + + SMSC_ASSERT_MAC_LOCK(pdata); + + smsc911x_mac_write(pdata, ADDRH, mac_high16); + smsc911x_mac_write(pdata, ADDRL, mac_low32); +} + +static int smsc911x_open(struct net_device *dev) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + unsigned int timeout; + unsigned int temp; + unsigned int intcfg; + + /* if the phy is not yet registered, retry later*/ + if (!pdata->phy_dev) { + SMSC_WARNING(HW, "phy_dev is NULL"); + return -EAGAIN; + } + + if (!is_valid_ether_addr(dev->dev_addr)) { + SMSC_WARNING(HW, "dev_addr is not a valid MAC address"); + return -EADDRNOTAVAIL; + } + + /* Reset the LAN911x */ + if (smsc911x_soft_reset(pdata)) { + SMSC_WARNING(HW, "soft reset failed"); + return -EIO; + } + + smsc911x_reg_write(pdata, HW_CFG, 0x00050000); + smsc911x_reg_write(pdata, AFC_CFG, 0x006E3740); + + /* Make sure EEPROM has finished loading before setting GPIO_CFG */ + timeout = 50; + while ((timeout--) && + (smsc911x_reg_read(pdata, E2P_CMD) & E2P_CMD_EPC_BUSY_)) { + udelay(10); + } + + if (unlikely(timeout == 0)) + SMSC_WARNING(IFUP, + "Timed out waiting for EEPROM busy bit to clear"); + + smsc911x_reg_write(pdata, GPIO_CFG, 0x70070000); + + /* The soft reset above cleared the device's MAC address, + * restore it from local copy (set in probe) */ + spin_lock_irq(&pdata->mac_lock); + smsc911x_set_mac_address(pdata, dev->dev_addr); + spin_unlock_irq(&pdata->mac_lock); + + /* Initialise irqs, but leave all sources disabled */ + smsc911x_reg_write(pdata, INT_EN, 0); + smsc911x_reg_write(pdata, INT_STS, 0xFFFFFFFF); + + /* Set interrupt deassertion to 100uS */ + intcfg = ((10 << 24) | INT_CFG_IRQ_EN_); + + if (pdata->irq_polarity) { + SMSC_TRACE(IFUP, "irq polarity: active high"); + intcfg |= INT_CFG_IRQ_POL_; + } else { + SMSC_TRACE(IFUP, "irq polarity: active low"); + } + + if (pdata->irq_type) { + SMSC_TRACE(IFUP, "irq type: push-pull"); + intcfg |= INT_CFG_IRQ_TYPE_; + } else { + SMSC_TRACE(IFUP, "irq type: open drain"); + } + + smsc911x_reg_write(pdata, INT_CFG, intcfg); + + SMSC_TRACE(IFUP, "Testing irq handler using IRQ %d", dev->irq); + pdata->software_irq_signal = 0; + smp_wmb(); + + temp = smsc911x_reg_read(pdata, INT_EN); + temp |= INT_EN_SW_INT_EN_; + smsc911x_reg_write(pdata, INT_EN, temp); + + timeout = 1000; + while (timeout--) { + if (pdata->software_irq_signal) + break; + msleep(1); + } + + if (!pdata->software_irq_signal) { + dev_warn(&dev->dev, "ISR failed signaling test (IRQ %d)\n", + dev->irq); + return -ENODEV; + } + SMSC_TRACE(IFUP, "IRQ handler passed test using IRQ %d", dev->irq); + + dev_info(&dev->dev, "SMSC911x/921x identified at %#08lx, IRQ: %d\n", + (unsigned long)pdata->ioaddr, dev->irq); + + /* Bring the PHY up */ + phy_start(pdata->phy_dev); + + temp = smsc911x_reg_read(pdata, HW_CFG); + /* Preserve TX FIFO size and external PHY configuration */ + temp &= (HW_CFG_TX_FIF_SZ_|0x00000FFF); + temp |= HW_CFG_SF_; + smsc911x_reg_write(pdata, HW_CFG, temp); + + temp = smsc911x_reg_read(pdata, FIFO_INT); + temp |= FIFO_INT_TX_AVAIL_LEVEL_; + temp &= ~(FIFO_INT_RX_STS_LEVEL_); + smsc911x_reg_write(pdata, FIFO_INT, temp); + + /* set RX Data offset to 2 bytes for alignment */ + smsc911x_reg_write(pdata, RX_CFG, (2 << 8)); + + /* enable NAPI polling before enabling RX interrupts */ + napi_enable(&pdata->napi); + + temp = smsc911x_reg_read(pdata, INT_EN); + temp |= (INT_EN_TDFA_EN_ | INT_EN_RSFL_EN_); + smsc911x_reg_write(pdata, INT_EN, temp); + + spin_lock_irq(&pdata->mac_lock); + temp = smsc911x_mac_read(pdata, MAC_CR); + temp |= (MAC_CR_TXEN_ | MAC_CR_RXEN_ | MAC_CR_HBDIS_); + smsc911x_mac_write(pdata, MAC_CR, temp); + spin_unlock_irq(&pdata->mac_lock); + + smsc911x_reg_write(pdata, TX_CFG, TX_CFG_TX_ON_); + + netif_start_queue(dev); + return 0; +} + +/* Entry point for stopping the interface */ +static int smsc911x_stop(struct net_device *dev) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + unsigned int temp; + + BUG_ON(!pdata->phy_dev); + + /* Disable all device interrupts */ + temp = smsc911x_reg_read(pdata, INT_CFG); + temp &= ~INT_CFG_IRQ_EN_; + smsc911x_reg_write(pdata, INT_CFG, temp); + + /* Stop Tx and Rx polling */ + netif_stop_queue(dev); + napi_disable(&pdata->napi); + + /* At this point all Rx and Tx activity is stopped */ + dev->stats.rx_dropped += smsc911x_reg_read(pdata, RX_DROP); + smsc911x_tx_update_txcounters(dev); + + /* Bring the PHY down */ + phy_stop(pdata->phy_dev); + + SMSC_TRACE(IFDOWN, "Interface stopped"); + return 0; +} + +/* Entry point for transmitting a packet */ +static int smsc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + unsigned int freespace; + unsigned int tx_cmd_a; + unsigned int tx_cmd_b; + unsigned int temp; + u32 wrsz; + ulong bufp; + + freespace = smsc911x_reg_read(pdata, TX_FIFO_INF) & TX_FIFO_INF_TDFREE_; + + if (unlikely(freespace < TX_FIFO_LOW_THRESHOLD)) + SMSC_WARNING(TX_ERR, + "Tx data fifo low, space available: %d", freespace); + + /* Word alignment adjustment */ + tx_cmd_a = (u32)((ulong)skb->data & 0x03) << 16; + tx_cmd_a |= TX_CMD_A_FIRST_SEG_ | TX_CMD_A_LAST_SEG_; + tx_cmd_a |= (unsigned int)skb->len; + + tx_cmd_b = ((unsigned int)skb->len) << 16; + tx_cmd_b |= (unsigned int)skb->len; + + smsc911x_reg_write(pdata, TX_DATA_FIFO, tx_cmd_a); + smsc911x_reg_write(pdata, TX_DATA_FIFO, tx_cmd_b); + + bufp = (ulong)skb->data & (~0x3); + wrsz = (u32)skb->len + 3; + wrsz += (u32)((ulong)skb->data & 0x3); + wrsz >>= 2; + + smsc911x_tx_writefifo(pdata, (unsigned int *)bufp, wrsz); + freespace -= (skb->len + 32); + dev_kfree_skb(skb); + dev->trans_start = jiffies; + + if (unlikely(smsc911x_tx_get_txstatcount(pdata) >= 30)) + smsc911x_tx_update_txcounters(dev); + + if (freespace < TX_FIFO_LOW_THRESHOLD) { + netif_stop_queue(dev); + temp = smsc911x_reg_read(pdata, FIFO_INT); + temp &= 0x00FFFFFF; + temp |= 0x32000000; + smsc911x_reg_write(pdata, FIFO_INT, temp); + } + + return NETDEV_TX_OK; +} + +/* Entry point for getting status counters */ +static struct net_device_stats *smsc911x_get_stats(struct net_device *dev) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + smsc911x_tx_update_txcounters(dev); + dev->stats.rx_dropped += smsc911x_reg_read(pdata, RX_DROP); + return &dev->stats; +} + +/* Entry point for setting addressing modes */ +static void smsc911x_set_multicast_list(struct net_device *dev) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + unsigned long flags; + + if (dev->flags & IFF_PROMISC) { + /* Enabling promiscuous mode */ + pdata->set_bits_mask = MAC_CR_PRMS_; + pdata->clear_bits_mask = (MAC_CR_MCPAS_ | MAC_CR_HPFILT_); + pdata->hashhi = 0; + pdata->hashlo = 0; + } else if (dev->flags & IFF_ALLMULTI) { + /* Enabling all multicast mode */ + pdata->set_bits_mask = MAC_CR_MCPAS_; + pdata->clear_bits_mask = (MAC_CR_PRMS_ | MAC_CR_HPFILT_); + pdata->hashhi = 0; + pdata->hashlo = 0; + } else if (dev->mc_count > 0) { + /* Enabling specific multicast addresses */ + unsigned int hash_high = 0; + unsigned int hash_low = 0; + unsigned int count = 0; + struct dev_mc_list *mc_list = dev->mc_list; + + pdata->set_bits_mask = MAC_CR_HPFILT_; + pdata->clear_bits_mask = (MAC_CR_PRMS_ | MAC_CR_MCPAS_); + + while (mc_list) { + count++; + if ((mc_list->dmi_addrlen) == ETH_ALEN) { + unsigned int bitnum = + smsc911x_hash(mc_list->dmi_addr); + unsigned int mask = 0x01 << (bitnum & 0x1F); + if (bitnum & 0x20) + hash_high |= mask; + else + hash_low |= mask; + } else { + SMSC_WARNING(DRV, "dmi_addrlen != 6"); + } + mc_list = mc_list->next; + } + if (count != (unsigned int)dev->mc_count) + SMSC_WARNING(DRV, "mc_count != dev->mc_count"); + + pdata->hashhi = hash_high; + pdata->hashlo = hash_low; + } else { + /* Enabling local MAC address only */ + pdata->set_bits_mask = 0; + pdata->clear_bits_mask = + (MAC_CR_PRMS_ | MAC_CR_MCPAS_ | MAC_CR_HPFILT_); + pdata->hashhi = 0; + pdata->hashlo = 0; + } + + spin_lock_irqsave(&pdata->mac_lock, flags); + + if (pdata->generation <= 1) { + /* Older hardware revision - cannot change these flags while + * receiving data */ + if (!pdata->multicast_update_pending) { + unsigned int temp; + SMSC_TRACE(HW, "scheduling mcast update"); + pdata->multicast_update_pending = 1; + + /* Request the hardware to stop, then perform the + * update when we get an RX_STOP interrupt */ + smsc911x_reg_write(pdata, INT_STS, INT_STS_RXSTOP_INT_); + temp = smsc911x_reg_read(pdata, INT_EN); + temp |= INT_EN_RXSTOP_INT_EN_; + smsc911x_reg_write(pdata, INT_EN, temp); + + temp = smsc911x_mac_read(pdata, MAC_CR); + temp &= ~(MAC_CR_RXEN_); + smsc911x_mac_write(pdata, MAC_CR, temp); + } else { + /* There is another update pending, this should now + * use the newer values */ + } + } else { + /* Newer hardware revision - can write immediately */ + smsc911x_rx_multicast_update(pdata); + } + + spin_unlock_irqrestore(&pdata->mac_lock, flags); +} + +static irqreturn_t smsc911x_irqhandler(int irq, void *dev_id) +{ + struct net_device *dev = dev_id; + struct smsc911x_data *pdata = netdev_priv(dev); + u32 intsts = smsc911x_reg_read(pdata, INT_STS); + u32 inten = smsc911x_reg_read(pdata, INT_EN); + int serviced = IRQ_NONE; + u32 temp; + + if (unlikely(intsts & inten & INT_STS_SW_INT_)) { + temp = smsc911x_reg_read(pdata, INT_EN); + temp &= (~INT_EN_SW_INT_EN_); + smsc911x_reg_write(pdata, INT_EN, temp); + smsc911x_reg_write(pdata, INT_STS, INT_STS_SW_INT_); + pdata->software_irq_signal = 1; + smp_wmb(); + serviced = IRQ_HANDLED; + } + + if (unlikely(intsts & inten & INT_STS_RXSTOP_INT_)) { + /* Called when there is a multicast update scheduled and + * it is now safe to complete the update */ + SMSC_TRACE(INTR, "RX Stop interrupt"); + temp = smsc911x_reg_read(pdata, INT_EN); + temp &= (~INT_EN_RXSTOP_INT_EN_); + smsc911x_reg_write(pdata, INT_EN, temp); + smsc911x_reg_write(pdata, INT_STS, INT_STS_RXSTOP_INT_); + smsc911x_rx_multicast_update_workaround(pdata); + serviced = IRQ_HANDLED; + } + + if (intsts & inten & INT_STS_TDFA_) { + temp = smsc911x_reg_read(pdata, FIFO_INT); + temp |= FIFO_INT_TX_AVAIL_LEVEL_; + smsc911x_reg_write(pdata, FIFO_INT, temp); + smsc911x_reg_write(pdata, INT_STS, INT_STS_TDFA_); + netif_wake_queue(dev); + serviced = IRQ_HANDLED; + } + + if (unlikely(intsts & inten & INT_STS_RXE_)) { + SMSC_TRACE(INTR, "RX Error interrupt"); + smsc911x_reg_write(pdata, INT_STS, INT_STS_RXE_); + serviced = IRQ_HANDLED; + } + + if (likely(intsts & inten & INT_STS_RSFL_)) { + if (likely(netif_rx_schedule_prep(dev, &pdata->napi))) { + /* Disable Rx interrupts */ + temp = smsc911x_reg_read(pdata, INT_EN); + temp &= (~INT_EN_RSFL_EN_); + smsc911x_reg_write(pdata, INT_EN, temp); + /* Schedule a NAPI poll */ + __netif_rx_schedule(dev, &pdata->napi); + } else { + SMSC_WARNING(RX_ERR, + "netif_rx_schedule_prep failed"); + } + serviced = IRQ_HANDLED; + } + + return serviced; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +void smsc911x_poll_controller(struct net_device *dev) +{ + disable_irq(dev->irq); + smsc911x_irqhandler(0, dev); + enable_irq(dev->irq); +} +#endif /* CONFIG_NET_POLL_CONTROLLER */ + +/* Standard ioctls for mii-tool */ +static int smsc911x_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + + if (!netif_running(dev) || !pdata->phy_dev) + return -EINVAL; + + return phy_mii_ioctl(pdata->phy_dev, if_mii(ifr), cmd); +} + +static int +smsc911x_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + + cmd->maxtxpkt = 1; + cmd->maxrxpkt = 1; + return phy_ethtool_gset(pdata->phy_dev, cmd); +} + +static int +smsc911x_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + + return phy_ethtool_sset(pdata->phy_dev, cmd); +} + +static void smsc911x_ethtool_getdrvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strlcpy(info->driver, SMSC_CHIPNAME, sizeof(info->driver)); + strlcpy(info->version, SMSC_DRV_VERSION, sizeof(info->version)); + strlcpy(info->bus_info, dev->dev.parent->bus_id, + sizeof(info->bus_info)); +} + +static int smsc911x_ethtool_nwayreset(struct net_device *dev) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + + return phy_start_aneg(pdata->phy_dev); +} + +static u32 smsc911x_ethtool_getmsglevel(struct net_device *dev) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + return pdata->msg_enable; +} + +static void smsc911x_ethtool_setmsglevel(struct net_device *dev, u32 level) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + pdata->msg_enable = level; +} + +static int smsc911x_ethtool_getregslen(struct net_device *dev) +{ + return (((E2P_DATA - ID_REV) / 4 + 1) + (WUCSR - MAC_CR) + 1 + 32) * + sizeof(u32); +} + +static void +smsc911x_ethtool_getregs(struct net_device *dev, struct ethtool_regs *regs, + void *buf) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + struct phy_device *phy_dev = pdata->phy_dev; + unsigned long flags; + unsigned int i; + unsigned int j = 0; + u32 *data = buf; + + regs->version = pdata->idrev; + for (i = ID_REV; i <= E2P_DATA; i += (sizeof(u32))) + data[j++] = smsc911x_reg_read(pdata, i); + + for (i = MAC_CR; i <= WUCSR; i++) { + spin_lock_irqsave(&pdata->mac_lock, flags); + data[j++] = smsc911x_mac_read(pdata, i); + spin_unlock_irqrestore(&pdata->mac_lock, flags); + } + + for (i = 0; i <= 31; i++) + data[j++] = smsc911x_mii_read(phy_dev->bus, phy_dev->addr, i); +} + +static void smsc911x_eeprom_enable_access(struct smsc911x_data *pdata) +{ + unsigned int temp = smsc911x_reg_read(pdata, GPIO_CFG); + temp &= ~GPIO_CFG_EEPR_EN_; + smsc911x_reg_write(pdata, GPIO_CFG, temp); + msleep(1); +} + +static int smsc911x_eeprom_send_cmd(struct smsc911x_data *pdata, u32 op) +{ + int timeout = 100; + u32 e2cmd; + + SMSC_TRACE(DRV, "op 0x%08x", op); + if (smsc911x_reg_read(pdata, E2P_CMD) & E2P_CMD_EPC_BUSY_) { + SMSC_WARNING(DRV, "Busy at start"); + return -EBUSY; + } + + e2cmd = op | E2P_CMD_EPC_BUSY_; + smsc911x_reg_write(pdata, E2P_CMD, e2cmd); + + do { + msleep(1); + e2cmd = smsc911x_reg_read(pdata, E2P_CMD); + } while ((e2cmd & E2P_CMD_EPC_BUSY_) && (timeout--)); + + if (!timeout) { + SMSC_TRACE(DRV, "TIMED OUT"); + return -EAGAIN; + } + + if (e2cmd & E2P_CMD_EPC_TIMEOUT_) { + SMSC_TRACE(DRV, "Error occured during eeprom operation"); + return -EINVAL; + } + + return 0; +} + +static int smsc911x_eeprom_read_location(struct smsc911x_data *pdata, + u8 address, u8 *data) +{ + u32 op = E2P_CMD_EPC_CMD_READ_ | address; + int ret; + + SMSC_TRACE(DRV, "address 0x%x", address); + ret = smsc911x_eeprom_send_cmd(pdata, op); + + if (!ret) + data[address] = smsc911x_reg_read(pdata, E2P_DATA); + + return ret; +} + +static int smsc911x_eeprom_write_location(struct smsc911x_data *pdata, + u8 address, u8 data) +{ + u32 op = E2P_CMD_EPC_CMD_ERASE_ | address; + int ret; + + SMSC_TRACE(DRV, "address 0x%x, data 0x%x", address, data); + ret = smsc911x_eeprom_send_cmd(pdata, op); + + if (!ret) { + op = E2P_CMD_EPC_CMD_WRITE_ | address; + smsc911x_reg_write(pdata, E2P_DATA, (u32)data); + ret = smsc911x_eeprom_send_cmd(pdata, op); + } + + return ret; +} + +static int smsc911x_ethtool_get_eeprom_len(struct net_device *dev) +{ + return SMSC911X_EEPROM_SIZE; +} + +static int smsc911x_ethtool_get_eeprom(struct net_device *dev, + struct ethtool_eeprom *eeprom, u8 *data) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + u8 eeprom_data[SMSC911X_EEPROM_SIZE]; + int len; + int i; + + smsc911x_eeprom_enable_access(pdata); + + len = min(eeprom->len, SMSC911X_EEPROM_SIZE); + for (i = 0; i < len; i++) { + int ret = smsc911x_eeprom_read_location(pdata, i, eeprom_data); + if (ret < 0) { + eeprom->len = 0; + return ret; + } + } + + memcpy(data, &eeprom_data[eeprom->offset], len); + eeprom->len = len; + return 0; +} + +static int smsc911x_ethtool_set_eeprom(struct net_device *dev, + struct ethtool_eeprom *eeprom, u8 *data) +{ + int ret; + struct smsc911x_data *pdata = netdev_priv(dev); + + smsc911x_eeprom_enable_access(pdata); + smsc911x_eeprom_send_cmd(pdata, E2P_CMD_EPC_CMD_EWEN_); + ret = smsc911x_eeprom_write_location(pdata, eeprom->offset, *data); + smsc911x_eeprom_send_cmd(pdata, E2P_CMD_EPC_CMD_EWDS_); + + /* Single byte write, according to man page */ + eeprom->len = 1; + + return ret; +} + +static struct ethtool_ops smsc911x_ethtool_ops = { + .get_settings = smsc911x_ethtool_getsettings, + .set_settings = smsc911x_ethtool_setsettings, + .get_link = ethtool_op_get_link, + .get_drvinfo = smsc911x_ethtool_getdrvinfo, + .nway_reset = smsc911x_ethtool_nwayreset, + .get_msglevel = smsc911x_ethtool_getmsglevel, + .set_msglevel = smsc911x_ethtool_setmsglevel, + .get_regs_len = smsc911x_ethtool_getregslen, + .get_regs = smsc911x_ethtool_getregs, + .get_eeprom_len = smsc911x_ethtool_get_eeprom_len, + .get_eeprom = smsc911x_ethtool_get_eeprom, + .set_eeprom = smsc911x_ethtool_set_eeprom, +}; + +/* Initializing private device structures, only called from probe */ +static int __devinit smsc911x_init(struct net_device *dev) +{ + struct smsc911x_data *pdata = netdev_priv(dev); + unsigned int byte_test; + + SMSC_TRACE(PROBE, "Driver Parameters:"); + SMSC_TRACE(PROBE, "LAN base: 0x%08lX", + (unsigned long)pdata->ioaddr); + SMSC_TRACE(PROBE, "IRQ: %d", dev->irq); + SMSC_TRACE(PROBE, "PHY will be autodetected."); + +#if (!SMSC_CAN_USE_32BIT) + spin_lock_init(&pdata->dev_lock); +#endif + + if (pdata->ioaddr == 0) { + SMSC_WARNING(PROBE, "pdata->ioaddr: 0x00000000"); + return -ENODEV; + } + + /* Check byte ordering */ + byte_test = smsc911x_reg_read(pdata, BYTE_TEST); + SMSC_TRACE(PROBE, "BYTE_TEST: 0x%08X", byte_test); + if (byte_test == 0x43218765) { + SMSC_TRACE(PROBE, "BYTE_TEST looks swapped, " + "applying WORD_SWAP"); + smsc911x_reg_write(pdata, WORD_SWAP, 0xffffffff); + + /* 1 dummy read of BYTE_TEST is needed after a write to + * WORD_SWAP before its contents are valid */ + byte_test = smsc911x_reg_read(pdata, BYTE_TEST); + + byte_test = smsc911x_reg_read(pdata, BYTE_TEST); + } + + if (byte_test != 0x87654321) { + SMSC_WARNING(DRV, "BYTE_TEST: 0x%08X", byte_test); + if (((byte_test >> 16) & 0xFFFF) == (byte_test & 0xFFFF)) { + SMSC_WARNING(PROBE, + "top 16 bits equal to bottom 16 bits"); + SMSC_TRACE(PROBE, "This may mean the chip is set " + "for 32 bit while the bus is reading 16 bit"); + } + return -ENODEV; + } + + /* Default generation to zero (all workarounds apply) */ + pdata->generation = 0; + + pdata->idrev = smsc911x_reg_read(pdata, ID_REV); + switch (pdata->idrev & 0xFFFF0000) { + case 0x01180000: + case 0x01170000: + case 0x01160000: + case 0x01150000: + /* LAN911[5678] family */ + pdata->generation = pdata->idrev & 0x0000FFFF; + break; + + case 0x118A0000: + case 0x117A0000: + case 0x116A0000: + case 0x115A0000: + /* LAN921[5678] family */ + pdata->generation = 3; + break; + + case 0x92100000: + case 0x92110000: + case 0x92200000: + case 0x92210000: + /* LAN9210/LAN9211/LAN9220/LAN9221 */ + pdata->generation = 4; + break; + + default: + SMSC_WARNING(PROBE, "LAN911x not identified, idrev: 0x%08X", + pdata->idrev); + return -ENODEV; + } + + SMSC_TRACE(PROBE, "LAN911x identified, idrev: 0x%08X, generation: %d", + pdata->idrev, pdata->generation); + + if (pdata->generation == 0) + SMSC_WARNING(PROBE, + "This driver is not intended for this chip revision"); + + /* Reset the LAN911x */ + if (smsc911x_soft_reset(pdata)) + return -ENODEV; + + /* Disable all interrupt sources until we bring the device up */ + smsc911x_reg_write(pdata, INT_EN, 0); + + ether_setup(dev); + dev->open = smsc911x_open; + dev->stop = smsc911x_stop; + dev->hard_start_xmit = smsc911x_hard_start_xmit; + dev->get_stats = smsc911x_get_stats; + dev->set_multicast_list = smsc911x_set_multicast_list; + dev->flags |= IFF_MULTICAST; + dev->do_ioctl = smsc911x_do_ioctl; + netif_napi_add(dev, &pdata->napi, smsc911x_poll, SMSC_NAPI_WEIGHT); + dev->ethtool_ops = &smsc911x_ethtool_ops; + +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = smsc911x_poll_controller; +#endif /* CONFIG_NET_POLL_CONTROLLER */ + + return 0; +} + +static int __devexit smsc911x_drv_remove(struct platform_device *pdev) +{ + struct net_device *dev; + struct smsc911x_data *pdata; + struct resource *res; + + dev = platform_get_drvdata(pdev); + BUG_ON(!dev); + pdata = netdev_priv(dev); + BUG_ON(!pdata); + BUG_ON(!pdata->ioaddr); + BUG_ON(!pdata->phy_dev); + + SMSC_TRACE(IFDOWN, "Stopping driver."); + + phy_disconnect(pdata->phy_dev); + pdata->phy_dev = NULL; + mdiobus_unregister(pdata->mii_bus); + mdiobus_free(pdata->mii_bus); + + platform_set_drvdata(pdev, NULL); + unregister_netdev(dev); + free_irq(dev->irq, dev); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "smsc911x-memory"); + if (!res) + platform_get_resource(pdev, IORESOURCE_MEM, 0); + + release_mem_region(res->start, res->end - res->start); + + iounmap(pdata->ioaddr); + + free_netdev(dev); + + return 0; +} + +static int __devinit smsc911x_drv_probe(struct platform_device *pdev) +{ + struct net_device *dev; + struct smsc911x_data *pdata; + struct resource *res; + unsigned int intcfg = 0; + int res_size; + int retval; + DECLARE_MAC_BUF(mac); + + pr_info("%s: Driver version %s.\n", SMSC_CHIPNAME, SMSC_DRV_VERSION); + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "smsc911x-memory"); + if (!res) + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + pr_warning("%s: Could not allocate resource.\n", + SMSC_CHIPNAME); + retval = -ENODEV; + goto out_0; + } + res_size = res->end - res->start; + + if (!request_mem_region(res->start, res_size, SMSC_CHIPNAME)) { + retval = -EBUSY; + goto out_0; + } + + dev = alloc_etherdev(sizeof(struct smsc911x_data)); + if (!dev) { + pr_warning("%s: Could not allocate device.\n", SMSC_CHIPNAME); + retval = -ENOMEM; + goto out_release_io_1; + } + + SET_NETDEV_DEV(dev, &pdev->dev); + + pdata = netdev_priv(dev); + + dev->irq = platform_get_irq(pdev, 0); + pdata->ioaddr = ioremap_nocache(res->start, res_size); + + /* copy config parameters across if present, otherwise pdata + * defaults to zeros */ + if (pdev->dev.platform_data) { + struct smsc911x_platform_config *config = + pdev->dev.platform_data; + pdata->irq_polarity = config->irq_polarity; + pdata->irq_type = config->irq_type; + pdata->phy_interface = config->phy_interface; + } + + pdata->dev = dev; + pdata->msg_enable = ((1 << debug) - 1); + + if (pdata->ioaddr == NULL) { + SMSC_WARNING(PROBE, + "Error smsc911x base address invalid"); + retval = -ENOMEM; + goto out_free_netdev_2; + } + + retval = smsc911x_init(dev); + if (retval < 0) + goto out_unmap_io_3; + + /* configure irq polarity and type before connecting isr */ + if (pdata->irq_polarity == SMSC911X_IRQ_POLARITY_ACTIVE_HIGH) + intcfg |= INT_CFG_IRQ_POL_; + + if (pdata->irq_type == SMSC911X_IRQ_TYPE_PUSH_PULL) + intcfg |= INT_CFG_IRQ_TYPE_; + + smsc911x_reg_write(pdata, INT_CFG, intcfg); + + /* Ensure interrupts are globally disabled before connecting ISR */ + smsc911x_reg_write(pdata, INT_EN, 0); + smsc911x_reg_write(pdata, INT_STS, 0xFFFFFFFF); + + retval = request_irq(dev->irq, smsc911x_irqhandler, IRQF_DISABLED, + SMSC_CHIPNAME, dev); + if (retval) { + SMSC_WARNING(PROBE, + "Unable to claim requested irq: %d", dev->irq); + goto out_unmap_io_3; + } + + platform_set_drvdata(pdev, dev); + + retval = register_netdev(dev); + if (retval) { + SMSC_WARNING(PROBE, + "Error %i registering device", retval); + goto out_unset_drvdata_4; + } else { + SMSC_TRACE(PROBE, "Network interface: \"%s\"", dev->name); + } + + spin_lock_init(&pdata->mac_lock); + + retval = smsc911x_mii_init(pdev, dev); + if (retval) { + SMSC_WARNING(PROBE, + "Error %i initialising mii", retval); + goto out_unregister_netdev_5; + } + + spin_lock_irq(&pdata->mac_lock); + + /* Check if mac address has been specified when bringing interface up */ + if (is_valid_ether_addr(dev->dev_addr)) { + smsc911x_set_mac_address(pdata, dev->dev_addr); + SMSC_TRACE(PROBE, "MAC Address is specified by configuration"); + } else { + /* Try reading mac address from device. if EEPROM is present + * it will already have been set */ + u32 mac_high16 = smsc911x_mac_read(pdata, ADDRH); + u32 mac_low32 = smsc911x_mac_read(pdata, ADDRL); + dev->dev_addr[0] = (u8)(mac_low32); + dev->dev_addr[1] = (u8)(mac_low32 >> 8); + dev->dev_addr[2] = (u8)(mac_low32 >> 16); + dev->dev_addr[3] = (u8)(mac_low32 >> 24); + dev->dev_addr[4] = (u8)(mac_high16); + dev->dev_addr[5] = (u8)(mac_high16 >> 8); + + if (is_valid_ether_addr(dev->dev_addr)) { + /* eeprom values are valid so use them */ + SMSC_TRACE(PROBE, + "Mac Address is read from LAN911x EEPROM"); + } else { + /* eeprom values are invalid, generate random MAC */ + random_ether_addr(dev->dev_addr); + smsc911x_set_mac_address(pdata, dev->dev_addr); + SMSC_TRACE(PROBE, + "MAC Address is set to random_ether_addr"); + } + } + + spin_unlock_irq(&pdata->mac_lock); + + dev_info(&dev->dev, "MAC Address: %s\n", + print_mac(mac, dev->dev_addr)); + + return 0; + +out_unregister_netdev_5: + unregister_netdev(dev); +out_unset_drvdata_4: + platform_set_drvdata(pdev, NULL); + free_irq(dev->irq, dev); +out_unmap_io_3: + iounmap(pdata->ioaddr); +out_free_netdev_2: + free_netdev(dev); +out_release_io_1: + release_mem_region(res->start, res->end - res->start); +out_0: + return retval; +} + +static struct platform_driver smsc911x_driver = { + .probe = smsc911x_drv_probe, + .remove = smsc911x_drv_remove, + .driver = { + .name = SMSC_CHIPNAME, + }, +}; + +/* Entry point for loading the module */ +static int __init smsc911x_init_module(void) +{ + return platform_driver_register(&smsc911x_driver); +} + +/* entry point for unloading the module */ +static void __exit smsc911x_cleanup_module(void) +{ + platform_driver_unregister(&smsc911x_driver); +} + +module_init(smsc911x_init_module); +module_exit(smsc911x_cleanup_module); diff --git a/drivers/net/smsc911x.h b/drivers/net/smsc911x.h new file mode 100644 index 000000000000..feb36de274ca --- /dev/null +++ b/drivers/net/smsc911x.h @@ -0,0 +1,394 @@ +/*************************************************************************** + * + * Copyright (C) 2004-2008 SMSC + * Copyright (C) 2005-2008 ARM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + ***************************************************************************/ +#ifndef __SMSC911X_H__ +#define __SMSC911X_H__ + +#define SMSC_CAN_USE_32BIT 1 +#define TX_FIFO_LOW_THRESHOLD ((u32)1600) +#define SMSC911X_EEPROM_SIZE ((u32)7) +#define USE_DEBUG 0 + +/* This is the maximum number of packets to be received every + * NAPI poll */ +#define SMSC_NAPI_WEIGHT 16 + +/* implements a PHY loopback test at initialisation time, to ensure a packet + * can be succesfully looped back */ +#define USE_PHY_WORK_AROUND + +#define DPRINTK(nlevel, klevel, fmt, args...) \ + ((void)((NETIF_MSG_##nlevel & pdata->msg_enable) && \ + printk(KERN_##klevel "%s: %s: " fmt "\n", \ + pdata->dev->name, __func__, ## args))) + +#if USE_DEBUG >= 1 +#define SMSC_WARNING(nlevel, fmt, args...) \ + DPRINTK(nlevel, WARNING, fmt, ## args) +#else +#define SMSC_WARNING(nlevel, fmt, args...) \ + ({ do {} while (0); 0; }) +#endif + +#if USE_DEBUG >= 2 +#define SMSC_TRACE(nlevel, fmt, args...) \ + DPRINTK(nlevel, INFO, fmt, ## args) +#else +#define SMSC_TRACE(nlevel, fmt, args...) \ + ({ do {} while (0); 0; }) +#endif + +#ifdef CONFIG_DEBUG_SPINLOCK +#define SMSC_ASSERT_MAC_LOCK(pdata) \ + WARN_ON(!spin_is_locked(&pdata->mac_lock)) +#else +#define SMSC_ASSERT_MAC_LOCK(pdata) do {} while (0) +#endif /* CONFIG_DEBUG_SPINLOCK */ + +#define FLOW_CTRL_TX (1) +#define FLOW_CTRL_RX (2) + +/* SMSC911x registers and bitfields */ +#define RX_DATA_FIFO 0x00 + +#define TX_DATA_FIFO 0x20 +#define TX_CMD_A_ON_COMP_ 0x80000000 +#define TX_CMD_A_BUF_END_ALGN_ 0x03000000 +#define TX_CMD_A_4_BYTE_ALGN_ 0x00000000 +#define TX_CMD_A_16_BYTE_ALGN_ 0x01000000 +#define TX_CMD_A_32_BYTE_ALGN_ 0x02000000 +#define TX_CMD_A_DATA_OFFSET_ 0x001F0000 +#define TX_CMD_A_FIRST_SEG_ 0x00002000 +#define TX_CMD_A_LAST_SEG_ 0x00001000 +#define TX_CMD_A_BUF_SIZE_ 0x000007FF +#define TX_CMD_B_PKT_TAG_ 0xFFFF0000 +#define TX_CMD_B_ADD_CRC_DISABLE_ 0x00002000 +#define TX_CMD_B_DISABLE_PADDING_ 0x00001000 +#define TX_CMD_B_PKT_BYTE_LENGTH_ 0x000007FF + +#define RX_STATUS_FIFO 0x40 +#define RX_STS_ES_ 0x00008000 +#define RX_STS_MCAST_ 0x00000400 + +#define RX_STATUS_FIFO_PEEK 0x44 + +#define TX_STATUS_FIFO 0x48 +#define TX_STS_ES_ 0x00008000 + +#define TX_STATUS_FIFO_PEEK 0x4C + +#define ID_REV 0x50 +#define ID_REV_CHIP_ID_ 0xFFFF0000 +#define ID_REV_REV_ID_ 0x0000FFFF + +#define INT_CFG 0x54 +#define INT_CFG_INT_DEAS_ 0xFF000000 +#define INT_CFG_INT_DEAS_CLR_ 0x00004000 +#define INT_CFG_INT_DEAS_STS_ 0x00002000 +#define INT_CFG_IRQ_INT_ 0x00001000 +#define INT_CFG_IRQ_EN_ 0x00000100 +#define INT_CFG_IRQ_POL_ 0x00000010 +#define INT_CFG_IRQ_TYPE_ 0x00000001 + +#define INT_STS 0x58 +#define INT_STS_SW_INT_ 0x80000000 +#define INT_STS_TXSTOP_INT_ 0x02000000 +#define INT_STS_RXSTOP_INT_ 0x01000000 +#define INT_STS_RXDFH_INT_ 0x00800000 +#define INT_STS_RXDF_INT_ 0x00400000 +#define INT_STS_TX_IOC_ 0x00200000 +#define INT_STS_RXD_INT_ 0x00100000 +#define INT_STS_GPT_INT_ 0x00080000 +#define INT_STS_PHY_INT_ 0x00040000 +#define INT_STS_PME_INT_ 0x00020000 +#define INT_STS_TXSO_ 0x00010000 +#define INT_STS_RWT_ 0x00008000 +#define INT_STS_RXE_ 0x00004000 +#define INT_STS_TXE_ 0x00002000 +#define INT_STS_TDFU_ 0x00000800 +#define INT_STS_TDFO_ 0x00000400 +#define INT_STS_TDFA_ 0x00000200 +#define INT_STS_TSFF_ 0x00000100 +#define INT_STS_TSFL_ 0x00000080 +#define INT_STS_RXDF_ 0x00000040 +#define INT_STS_RDFL_ 0x00000020 +#define INT_STS_RSFF_ 0x00000010 +#define INT_STS_RSFL_ 0x00000008 +#define INT_STS_GPIO2_INT_ 0x00000004 +#define INT_STS_GPIO1_INT_ 0x00000002 +#define INT_STS_GPIO0_INT_ 0x00000001 + +#define INT_EN 0x5C +#define INT_EN_SW_INT_EN_ 0x80000000 +#define INT_EN_TXSTOP_INT_EN_ 0x02000000 +#define INT_EN_RXSTOP_INT_EN_ 0x01000000 +#define INT_EN_RXDFH_INT_EN_ 0x00800000 +#define INT_EN_TIOC_INT_EN_ 0x00200000 +#define INT_EN_RXD_INT_EN_ 0x00100000 +#define INT_EN_GPT_INT_EN_ 0x00080000 +#define INT_EN_PHY_INT_EN_ 0x00040000 +#define INT_EN_PME_INT_EN_ 0x00020000 +#define INT_EN_TXSO_EN_ 0x00010000 +#define INT_EN_RWT_EN_ 0x00008000 +#define INT_EN_RXE_EN_ 0x00004000 +#define INT_EN_TXE_EN_ 0x00002000 +#define INT_EN_TDFU_EN_ 0x00000800 +#define INT_EN_TDFO_EN_ 0x00000400 +#define INT_EN_TDFA_EN_ 0x00000200 +#define INT_EN_TSFF_EN_ 0x00000100 +#define INT_EN_TSFL_EN_ 0x00000080 +#define INT_EN_RXDF_EN_ 0x00000040 +#define INT_EN_RDFL_EN_ 0x00000020 +#define INT_EN_RSFF_EN_ 0x00000010 +#define INT_EN_RSFL_EN_ 0x00000008 +#define INT_EN_GPIO2_INT_ 0x00000004 +#define INT_EN_GPIO1_INT_ 0x00000002 +#define INT_EN_GPIO0_INT_ 0x00000001 + +#define BYTE_TEST 0x64 + +#define FIFO_INT 0x68 +#define FIFO_INT_TX_AVAIL_LEVEL_ 0xFF000000 +#define FIFO_INT_TX_STS_LEVEL_ 0x00FF0000 +#define FIFO_INT_RX_AVAIL_LEVEL_ 0x0000FF00 +#define FIFO_INT_RX_STS_LEVEL_ 0x000000FF + +#define RX_CFG 0x6C +#define RX_CFG_RX_END_ALGN_ 0xC0000000 +#define RX_CFG_RX_END_ALGN4_ 0x00000000 +#define RX_CFG_RX_END_ALGN16_ 0x40000000 +#define RX_CFG_RX_END_ALGN32_ 0x80000000 +#define RX_CFG_RX_DMA_CNT_ 0x0FFF0000 +#define RX_CFG_RX_DUMP_ 0x00008000 +#define RX_CFG_RXDOFF_ 0x00001F00 + +#define TX_CFG 0x70 +#define TX_CFG_TXS_DUMP_ 0x00008000 +#define TX_CFG_TXD_DUMP_ 0x00004000 +#define TX_CFG_TXSAO_ 0x00000004 +#define TX_CFG_TX_ON_ 0x00000002 +#define TX_CFG_STOP_TX_ 0x00000001 + +#define HW_CFG 0x74 +#define HW_CFG_TTM_ 0x00200000 +#define HW_CFG_SF_ 0x00100000 +#define HW_CFG_TX_FIF_SZ_ 0x000F0000 +#define HW_CFG_TR_ 0x00003000 +#define HW_CFG_SRST_ 0x00000001 + +/* only available on 115/117 */ +#define HW_CFG_PHY_CLK_SEL_ 0x00000060 +#define HW_CFG_PHY_CLK_SEL_INT_PHY_ 0x00000000 +#define HW_CFG_PHY_CLK_SEL_EXT_PHY_ 0x00000020 +#define HW_CFG_PHY_CLK_SEL_CLK_DIS_ 0x00000040 +#define HW_CFG_SMI_SEL_ 0x00000010 +#define HW_CFG_EXT_PHY_DET_ 0x00000008 +#define HW_CFG_EXT_PHY_EN_ 0x00000004 +#define HW_CFG_SRST_TO_ 0x00000002 + +/* only available on 116/118 */ +#define HW_CFG_32_16_BIT_MODE_ 0x00000004 + +#define RX_DP_CTRL 0x78 +#define RX_DP_CTRL_RX_FFWD_ 0x80000000 + +#define RX_FIFO_INF 0x7C +#define RX_FIFO_INF_RXSUSED_ 0x00FF0000 +#define RX_FIFO_INF_RXDUSED_ 0x0000FFFF + +#define TX_FIFO_INF 0x80 +#define TX_FIFO_INF_TSUSED_ 0x00FF0000 +#define TX_FIFO_INF_TDFREE_ 0x0000FFFF + +#define PMT_CTRL 0x84 +#define PMT_CTRL_PM_MODE_ 0x00003000 +#define PMT_CTRL_PM_MODE_D0_ 0x00000000 +#define PMT_CTRL_PM_MODE_D1_ 0x00001000 +#define PMT_CTRL_PM_MODE_D2_ 0x00002000 +#define PMT_CTRL_PM_MODE_D3_ 0x00003000 +#define PMT_CTRL_PHY_RST_ 0x00000400 +#define PMT_CTRL_WOL_EN_ 0x00000200 +#define PMT_CTRL_ED_EN_ 0x00000100 +#define PMT_CTRL_PME_TYPE_ 0x00000040 +#define PMT_CTRL_WUPS_ 0x00000030 +#define PMT_CTRL_WUPS_NOWAKE_ 0x00000000 +#define PMT_CTRL_WUPS_ED_ 0x00000010 +#define PMT_CTRL_WUPS_WOL_ 0x00000020 +#define PMT_CTRL_WUPS_MULTI_ 0x00000030 +#define PMT_CTRL_PME_IND_ 0x00000008 +#define PMT_CTRL_PME_POL_ 0x00000004 +#define PMT_CTRL_PME_EN_ 0x00000002 +#define PMT_CTRL_READY_ 0x00000001 + +#define GPIO_CFG 0x88 +#define GPIO_CFG_LED3_EN_ 0x40000000 +#define GPIO_CFG_LED2_EN_ 0x20000000 +#define GPIO_CFG_LED1_EN_ 0x10000000 +#define GPIO_CFG_GPIO2_INT_POL_ 0x04000000 +#define GPIO_CFG_GPIO1_INT_POL_ 0x02000000 +#define GPIO_CFG_GPIO0_INT_POL_ 0x01000000 +#define GPIO_CFG_EEPR_EN_ 0x00700000 +#define GPIO_CFG_GPIOBUF2_ 0x00040000 +#define GPIO_CFG_GPIOBUF1_ 0x00020000 +#define GPIO_CFG_GPIOBUF0_ 0x00010000 +#define GPIO_CFG_GPIODIR2_ 0x00000400 +#define GPIO_CFG_GPIODIR1_ 0x00000200 +#define GPIO_CFG_GPIODIR0_ 0x00000100 +#define GPIO_CFG_GPIOD4_ 0x00000020 +#define GPIO_CFG_GPIOD3_ 0x00000010 +#define GPIO_CFG_GPIOD2_ 0x00000004 +#define GPIO_CFG_GPIOD1_ 0x00000002 +#define GPIO_CFG_GPIOD0_ 0x00000001 + +#define GPT_CFG 0x8C +#define GPT_CFG_TIMER_EN_ 0x20000000 +#define GPT_CFG_GPT_LOAD_ 0x0000FFFF + +#define GPT_CNT 0x90 +#define GPT_CNT_GPT_CNT_ 0x0000FFFF + +#define WORD_SWAP 0x98 + +#define FREE_RUN 0x9C + +#define RX_DROP 0xA0 + +#define MAC_CSR_CMD 0xA4 +#define MAC_CSR_CMD_CSR_BUSY_ 0x80000000 +#define MAC_CSR_CMD_R_NOT_W_ 0x40000000 +#define MAC_CSR_CMD_CSR_ADDR_ 0x000000FF + +#define MAC_CSR_DATA 0xA8 + +#define AFC_CFG 0xAC +#define AFC_CFG_AFC_HI_ 0x00FF0000 +#define AFC_CFG_AFC_LO_ 0x0000FF00 +#define AFC_CFG_BACK_DUR_ 0x000000F0 +#define AFC_CFG_FCMULT_ 0x00000008 +#define AFC_CFG_FCBRD_ 0x00000004 +#define AFC_CFG_FCADD_ 0x00000002 +#define AFC_CFG_FCANY_ 0x00000001 + +#define E2P_CMD 0xB0 +#define E2P_CMD_EPC_BUSY_ 0x80000000 +#define E2P_CMD_EPC_CMD_ 0x70000000 +#define E2P_CMD_EPC_CMD_READ_ 0x00000000 +#define E2P_CMD_EPC_CMD_EWDS_ 0x10000000 +#define E2P_CMD_EPC_CMD_EWEN_ 0x20000000 +#define E2P_CMD_EPC_CMD_WRITE_ 0x30000000 +#define E2P_CMD_EPC_CMD_WRAL_ 0x40000000 +#define E2P_CMD_EPC_CMD_ERASE_ 0x50000000 +#define E2P_CMD_EPC_CMD_ERAL_ 0x60000000 +#define E2P_CMD_EPC_CMD_RELOAD_ 0x70000000 +#define E2P_CMD_EPC_TIMEOUT_ 0x00000200 +#define E2P_CMD_MAC_ADDR_LOADED_ 0x00000100 +#define E2P_CMD_EPC_ADDR_ 0x000000FF + +#define E2P_DATA 0xB4 +#define E2P_DATA_EEPROM_DATA_ 0x000000FF +#define LAN_REGISTER_EXTENT 0x00000100 + +/* + * MAC Control and Status Register (Indirect Address) + * Offset (through the MAC_CSR CMD and DATA port) + */ +#define MAC_CR 0x01 +#define MAC_CR_RXALL_ 0x80000000 +#define MAC_CR_HBDIS_ 0x10000000 +#define MAC_CR_RCVOWN_ 0x00800000 +#define MAC_CR_LOOPBK_ 0x00200000 +#define MAC_CR_FDPX_ 0x00100000 +#define MAC_CR_MCPAS_ 0x00080000 +#define MAC_CR_PRMS_ 0x00040000 +#define MAC_CR_INVFILT_ 0x00020000 +#define MAC_CR_PASSBAD_ 0x00010000 +#define MAC_CR_HFILT_ 0x00008000 +#define MAC_CR_HPFILT_ 0x00002000 +#define MAC_CR_LCOLL_ 0x00001000 +#define MAC_CR_BCAST_ 0x00000800 +#define MAC_CR_DISRTY_ 0x00000400 +#define MAC_CR_PADSTR_ 0x00000100 +#define MAC_CR_BOLMT_MASK_ 0x000000C0 +#define MAC_CR_DFCHK_ 0x00000020 +#define MAC_CR_TXEN_ 0x00000008 +#define MAC_CR_RXEN_ 0x00000004 + +#define ADDRH 0x02 + +#define ADDRL 0x03 + +#define HASHH 0x04 + +#define HASHL 0x05 + +#define MII_ACC 0x06 +#define MII_ACC_PHY_ADDR_ 0x0000F800 +#define MII_ACC_MIIRINDA_ 0x000007C0 +#define MII_ACC_MII_WRITE_ 0x00000002 +#define MII_ACC_MII_BUSY_ 0x00000001 + +#define MII_DATA 0x07 + +#define FLOW 0x08 +#define FLOW_FCPT_ 0xFFFF0000 +#define FLOW_FCPASS_ 0x00000004 +#define FLOW_FCEN_ 0x00000002 +#define FLOW_FCBSY_ 0x00000001 + +#define VLAN1 0x09 + +#define VLAN2 0x0A + +#define WUFF 0x0B + +#define WUCSR 0x0C +#define WUCSR_GUE_ 0x00000200 +#define WUCSR_WUFR_ 0x00000040 +#define WUCSR_MPR_ 0x00000020 +#define WUCSR_WAKE_EN_ 0x00000004 +#define WUCSR_MPEN_ 0x00000002 + +/* + * Phy definitions (vendor-specific) + */ +#define LAN9118_PHY_ID 0x00C0001C + +#define MII_INTSTS 0x1D + +#define MII_INTMSK 0x1E +#define PHY_INTMSK_AN_RCV_ (1 << 1) +#define PHY_INTMSK_PDFAULT_ (1 << 2) +#define PHY_INTMSK_AN_ACK_ (1 << 3) +#define PHY_INTMSK_LNKDOWN_ (1 << 4) +#define PHY_INTMSK_RFAULT_ (1 << 5) +#define PHY_INTMSK_AN_COMP_ (1 << 6) +#define PHY_INTMSK_ENERGYON_ (1 << 7) +#define PHY_INTMSK_DEFAULT_ (PHY_INTMSK_ENERGYON_ | \ + PHY_INTMSK_AN_COMP_ | \ + PHY_INTMSK_RFAULT_ | \ + PHY_INTMSK_LNKDOWN_) + +#define ADVERTISE_PAUSE_ALL (ADVERTISE_PAUSE_CAP | \ + ADVERTISE_PAUSE_ASYM) + +#define LPA_PAUSE_ALL (LPA_PAUSE_CAP | \ + LPA_PAUSE_ASYM) + +#endif /* __SMSC911X_H__ */ diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h new file mode 100644 index 000000000000..47c4ffd10dbb --- /dev/null +++ b/include/linux/smsc911x.h @@ -0,0 +1,42 @@ +/*************************************************************************** + * + * Copyright (C) 2004-2008 SMSC + * Copyright (C) 2005-2008 ARM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + ***************************************************************************/ +#ifndef __LINUX_SMSC911X_H__ +#define __LINUX_SMSC911X_H__ + +#include + +/* platform_device configuration data, should be assigned to + * the platform_device's dev.platform_data */ +struct smsc911x_platform_config { + unsigned int irq_polarity; + unsigned int irq_type; + phy_interface_t phy_interface; +}; + +/* Constants for platform_device irq polarity configuration */ +#define SMSC911X_IRQ_POLARITY_ACTIVE_LOW 0 +#define SMSC911X_IRQ_POLARITY_ACTIVE_HIGH 1 + +/* Constants for platform_device irq type configuration */ +#define SMSC911X_IRQ_TYPE_OPEN_DRAIN 0 +#define SMSC911X_IRQ_TYPE_PUSH_PULL 1 + +#endif /* __LINUX_SMSC911X_H__ */ -- cgit v1.2.3 From 60a7ecf42661f2b22168751298592da6ee210c9e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 Nov 2008 16:05:44 -0500 Subject: ftrace: add quick function trace stop Impact: quick start and stop of function tracer This patch adds a way to disable the function tracer quickly without the need to run kstop_machine. It adds a new variable called function_trace_stop which will stop the calls to functions from mcount when set. This is just an on/off switch and does not handle recursion like preempt_disable(). It's main purpose is to help other tracers/debuggers start and stop tracing fuctions without the need to call kstop_machine. The config option HAVE_FUNCTION_TRACE_MCOUNT_TEST is added for archs that implement the testing of the function_trace_stop in the mcount arch dependent code. Otherwise, the test is done in the C code. x86 is the only arch at the moment that supports this. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/kernel/entry_32.S | 6 ++++++ arch/x86/kernel/entry_64.S | 5 +++++ include/linux/ftrace.h | 30 +++++++++++++++++++++++++++++ kernel/trace/Kconfig | 7 +++++++ kernel/trace/ftrace.c | 47 ++++++++++++++++++++++++++++++++++++---------- 6 files changed, 86 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6f20718d3156..d09e812c6223 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -29,6 +29,7 @@ config X86 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER select HAVE_ARCH_TRACEHOOK diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 28b597ef9ca1..9134de814c97 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1157,6 +1157,9 @@ ENTRY(mcount) END(mcount) ENTRY(ftrace_caller) + cmpl $0, function_trace_stop + jne ftrace_stub + pushl %eax pushl %ecx pushl %edx @@ -1180,6 +1183,9 @@ END(ftrace_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) + cmpl $0, function_trace_stop + jne ftrace_stub + cmpl $ftrace_stub, ftrace_trace_function jnz trace .globl ftrace_stub diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b86f332c96a6..08aa6b10933c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -68,6 +68,8 @@ ENTRY(mcount) END(mcount) ENTRY(ftrace_caller) + cmpl $0, function_trace_stop + jne ftrace_stub /* taken from glibc */ subq $0x38, %rsp @@ -103,6 +105,9 @@ END(ftrace_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) + cmpl $0, function_trace_stop + jne ftrace_stub + cmpq $ftrace_stub, ftrace_trace_function jnz trace .globl ftrace_stub diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4642959e5bda..794ab907dbfe 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -23,6 +23,34 @@ struct ftrace_ops { struct ftrace_ops *next; }; +extern int function_trace_stop; + +/** + * ftrace_stop - stop function tracer. + * + * A quick way to stop the function tracer. Note this an on off switch, + * it is not something that is recursive like preempt_disable. + * This does not disable the calling of mcount, it only stops the + * calling of functions from mcount. + */ +static inline void ftrace_stop(void) +{ + function_trace_stop = 1; +} + +/** + * ftrace_start - start the function tracer. + * + * This function is the inverse of ftrace_stop. This does not enable + * the function tracing if the function tracer is disabled. This only + * sets the function tracer flag to continue calling the functions + * from mcount. + */ +static inline void ftrace_start(void) +{ + function_trace_stop = 0; +} + /* * The ftrace_ops must be a static and should also * be read_mostly. These functions do modify read_mostly variables @@ -41,6 +69,8 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1); # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) static inline void ftrace_kill(void) { } +static inline void ftrace_stop(void) { } +static inline void ftrace_start(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 33dbefd471e8..fc4febc3334a 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -9,6 +9,13 @@ config NOP_TRACER config HAVE_FUNCTION_TRACER bool +config HAVE_FUNCTION_TRACE_MCOUNT_TEST + bool + help + This gets selected when the arch tests the function_trace_stop + variable at the mcount call site. Otherwise, this variable + is tested by the called function. + config HAVE_DYNAMIC_FTRACE bool diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4a39d24568c8..896c71f0f4c4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -47,6 +47,9 @@ int ftrace_enabled __read_mostly; static int last_ftrace_enabled; +/* Quick disabling of function tracer. */ +int function_trace_stop; + /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -63,6 +66,7 @@ static struct ftrace_ops ftrace_list_end __read_mostly = static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; +ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) { @@ -88,8 +92,23 @@ static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) void clear_ftrace_function(void) { ftrace_trace_function = ftrace_stub; + __ftrace_trace_function = ftrace_stub; } +#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST +/* + * For those archs that do not test ftrace_trace_stop in their + * mcount call site, we need to do it from C. + */ +static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip) +{ + if (function_trace_stop) + return; + + __ftrace_trace_function(ip, parent_ip); +} +#endif + static int __register_ftrace_function(struct ftrace_ops *ops) { /* should not be called from interrupt context */ @@ -110,10 +129,18 @@ static int __register_ftrace_function(struct ftrace_ops *ops) * For one func, simply call it directly. * For more than one func, call the chain. */ +#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST if (ops->next == &ftrace_list_end) ftrace_trace_function = ops->func; else ftrace_trace_function = ftrace_list_func; +#else + if (ops->next == &ftrace_list_end) + __ftrace_trace_function = ops->func; + else + __ftrace_trace_function = ftrace_list_func; + ftrace_trace_function = ftrace_test_stop_func; +#endif } spin_unlock(&ftrace_lock); @@ -526,7 +553,7 @@ static void ftrace_run_update_code(int command) } static ftrace_func_t saved_ftrace_func; -static int ftrace_start; +static int ftrace_start_up; static DEFINE_MUTEX(ftrace_start_lock); static void ftrace_startup(void) @@ -537,8 +564,8 @@ static void ftrace_startup(void) return; mutex_lock(&ftrace_start_lock); - ftrace_start++; - if (ftrace_start == 1) + ftrace_start_up++; + if (ftrace_start_up == 1) command |= FTRACE_ENABLE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { @@ -562,8 +589,8 @@ static void ftrace_shutdown(void) return; mutex_lock(&ftrace_start_lock); - ftrace_start--; - if (!ftrace_start) + ftrace_start_up--; + if (!ftrace_start_up) command |= FTRACE_DISABLE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { @@ -589,8 +616,8 @@ static void ftrace_startup_sysctl(void) mutex_lock(&ftrace_start_lock); /* Force update next time */ saved_ftrace_func = NULL; - /* ftrace_start is true if we want ftrace running */ - if (ftrace_start) + /* ftrace_start_up is true if we want ftrace running */ + if (ftrace_start_up) command |= FTRACE_ENABLE_CALLS; ftrace_run_update_code(command); @@ -605,8 +632,8 @@ static void ftrace_shutdown_sysctl(void) return; mutex_lock(&ftrace_start_lock); - /* ftrace_start is true if ftrace is running */ - if (ftrace_start) + /* ftrace_start_up is true if ftrace is running */ + if (ftrace_start_up) command |= FTRACE_DISABLE_CALLS; ftrace_run_update_code(command); @@ -1186,7 +1213,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftrace_start_lock); - if (iter->filtered && ftrace_start && ftrace_enabled) + if (iter->filtered && ftrace_start_up && ftrace_enabled) ftrace_run_update_code(FTRACE_ENABLE_CALLS); mutex_unlock(&ftrace_start_lock); mutex_unlock(&ftrace_sysctl_lock); -- cgit v1.2.3 From 0f04870148ecb825133bc2733f473b1c5773ac0b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 5 Nov 2008 16:05:44 -0500 Subject: ftrace: soft tracing stop and start Impact: add way to quickly start stop tracing from the kernel This patch adds a soft stop and start to the trace. This simply disables function tracing via the ftrace_disabled flag, and disables the trace buffers to prevent recording. The tracing code may still be executed, but the trace will not be recorded. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 ++++ kernel/trace/trace.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 84 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 794ab907dbfe..7a75fc6d41f4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -216,6 +216,9 @@ static inline void __ftrace_enabled_restore(int enabled) #ifdef CONFIG_TRACING extern int ftrace_dump_on_oops; +extern void tracing_start(void); +extern void tracing_stop(void); + extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); @@ -246,6 +249,8 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } static inline int ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); +static inline void tracing_start(void) { } +static inline void tracing_stop(void) { } static inline int ftrace_printk(const char *fmt, ...) { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 29ab40a764c8..113aea9447ec 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -43,6 +43,15 @@ unsigned long __read_mostly tracing_max_latency = (cycle_t)ULONG_MAX; unsigned long __read_mostly tracing_thresh; + +/* + * Kill all tracing for good (never come back). + * It is initialized to 1 but will turn to zero if the initialization + * of the tracer is successful. But that is the only place that sets + * this back to zero. + */ +int tracing_disabled = 1; + static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled); static inline void ftrace_disable_cpu(void) @@ -62,8 +71,6 @@ static cpumask_t __read_mostly tracing_buffer_mask; #define for_each_tracing_cpu(cpu) \ for_each_cpu_mask(cpu, tracing_buffer_mask) -static int tracing_disabled = 1; - /* * ftrace_dump_on_oops - variable to dump ftrace buffer on oops * @@ -613,6 +620,76 @@ static void trace_init_cmdlines(void) cmdline_idx = 0; } +static int trace_stop_count; +static DEFINE_SPINLOCK(tracing_start_lock); + +/** + * tracing_start - quick start of the tracer + * + * If tracing is enabled but was stopped by tracing_stop, + * this will start the tracer back up. + */ +void tracing_start(void) +{ + struct ring_buffer *buffer; + unsigned long flags; + + if (tracing_disabled) + return; + + spin_lock_irqsave(&tracing_start_lock, flags); + if (--trace_stop_count) + goto out; + + if (trace_stop_count < 0) { + /* Someone screwed up their debugging */ + WARN_ON_ONCE(1); + trace_stop_count = 0; + goto out; + } + + + buffer = global_trace.buffer; + if (buffer) + ring_buffer_record_enable(buffer); + + buffer = max_tr.buffer; + if (buffer) + ring_buffer_record_enable(buffer); + + ftrace_start(); + out: + spin_unlock_irqrestore(&tracing_start_lock, flags); +} + +/** + * tracing_stop - quick stop of the tracer + * + * Light weight way to stop tracing. Use in conjunction with + * tracing_start. + */ +void tracing_stop(void) +{ + struct ring_buffer *buffer; + unsigned long flags; + + ftrace_stop(); + spin_lock_irqsave(&tracing_start_lock, flags); + if (trace_stop_count++) + goto out; + + buffer = global_trace.buffer; + if (buffer) + ring_buffer_record_disable(buffer); + + buffer = max_tr.buffer; + if (buffer) + ring_buffer_record_disable(buffer); + + out: + spin_unlock_irqrestore(&tracing_start_lock, flags); +} + void trace_stop_cmdline_recording(void); static void trace_save_cmdline(struct task_struct *tsk) -- cgit v1.2.3 From 6a60dd121c5b6c2d827e99b38c1326f2600c3891 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 6 Nov 2008 15:55:21 -0500 Subject: ftrace: split out hardirq ftrace code into own header Impact: moving of function prototypes into own header file ftrace.h is too big of a file for hardirq.h, and some archs will fail to build because of the include dependencies not being met. This patch pulls out the required prototypes for hardirq.h into a smaller and safer ftrace_irq.h file. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 5 ----- include/linux/ftrace_irq.h | 13 +++++++++++++ include/linux/hardirq.h | 2 +- 3 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 include/linux/ftrace_irq.h (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0ad1b48aea69..1b340e3fa249 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -104,9 +104,6 @@ extern void ftrace_release(void *start, unsigned long size); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); -extern void ftrace_nmi_enter(void); -extern void ftrace_nmi_exit(void); - #else # define skip_trace(ip) ({ 0; }) # define ftrace_force_update() ({ 0; }) @@ -114,8 +111,6 @@ extern void ftrace_nmi_exit(void); # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) static inline void ftrace_release(void *start, unsigned long size) { } -static inline void ftrace_nmi_enter(void) { } -static inline void ftrace_nmi_exit(void) { } #endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h new file mode 100644 index 000000000000..b1299d6729f2 --- /dev/null +++ b/include/linux/ftrace_irq.h @@ -0,0 +1,13 @@ +#ifndef _LINUX_FTRACE_IRQ_H +#define _LINUX_FTRACE_IRQ_H + + +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_nmi_enter(void); +extern void ftrace_nmi_exit(void); +#else +static inline void ftrace_nmi_enter(void) { } +static inline void ftrace_nmi_exit(void) { } +#endif + +#endif /* _LINUX_FTRACE_IRQ_H */ diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index ffc16ab5a878..89a56d79e4c6 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include -- cgit v1.2.3 From 945eed02cd619f525e097319cd3d18c58d01da87 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 7 Nov 2008 09:08:15 +0100 Subject: ALSA: Evaluate condition in snd_BUG_ON() in non-debugging case Change snd_BUG_ON() to evaluate the given condition, at least, in syntax for avoiding compile warnings such as unused variables. The compiler should optimize out the condition evaluation in the real code, though. Signed-off-by: Takashi Iwai --- include/sound/core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index 7e5589472681..6fa4c7b1970a 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -390,11 +390,11 @@ void snd_verbose_printd(const char *file, int line, const char *format, ...) #define snd_printd(fmt, args...) do { } while (0) #define snd_BUG() do { } while (0) -static inline int __snd_bug_on(void) +static inline int __snd_bug_on(int cond) { return 0; } -#define snd_BUG_ON(cond) __snd_bug_on() /* always false */ +#define snd_BUG_ON(cond) __snd_bug_on(0 && (cond)) /* always false */ #endif /* CONFIG_SND_DEBUG */ -- cgit v1.2.3 From f66fcedc84dd06d42a0dba3894d238498509e8b7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 7 Nov 2008 09:37:22 +0100 Subject: ALSA: Document debug macros Add descriptions of snd_BUG() and snd_BUG_ON(). Also fixed a typo in the comment of snd_printk(), too. Signed-off-by: Takashi Iwai --- include/sound/core.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index 6fa4c7b1970a..b2f3bbe9e56d 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -356,7 +356,7 @@ void snd_verbose_printd(const char *file, int line, const char *format, ...) * snd_printk - printk wrapper * @fmt: format string * - * Works like print() but prints the file and the line of the caller + * Works like printk() but prints the file and the line of the caller * when configured with CONFIG_SND_VERBOSE_PRINTK. */ #define snd_printk(fmt, args...) \ @@ -383,7 +383,29 @@ void snd_verbose_printd(const char *file, int line, const char *format, ...) printk(fmt ,##args) #endif +/** + * snd_BUG - give a BUG warning message and stack trace + * + * Calls WARN() if CONFIG_SND_DEBUG is set. + * Ignored when CONFIG_SND_DEBUG is not set. + */ #define snd_BUG() WARN(1, "BUG?\n") + +/** + * snd_BUG_ON - debugging check macro + * @cond: condition to evaluate + * + * When CONFIG_SND_DEBUG is set, this macro evaluates the given condition, + * and call WARN() and returns the value if it's non-zero. + * + * When CONFIG_SND_DEBUG is not set, this just returns zero, and the given + * condition is ignored. + * + * NOTE: the argument won't be evaluated at all when CONFIG_SND_DEBUG=n. + * Thus, don't put any statement that influences on the code behavior, + * such as pre/post increment, to the argument of this macro. + * If you want to evaluate and give a warning, use standard WARN_ON(). + */ #define snd_BUG_ON(cond) WARN((cond), "BUG? (%s)\n", __stringify(cond)) #else /* !CONFIG_SND_DEBUG */ -- cgit v1.2.3 From 3d8160b1493bcadca74fbb635d79b3928b8999cf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 7 Nov 2008 22:52:14 -0800 Subject: Revert "net: Guaranetee the proper ordering of the loopback device." This reverts commit ae33bc40c0d96d02f51a996482ea7e41c5152695. --- drivers/net/loopback.c | 13 +++++++++++-- include/linux/netdevice.h | 1 - net/core/dev.c | 12 ------------ 3 files changed, 11 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index c4516b580ba5..91d08585a6d8 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -204,8 +204,17 @@ static __net_exit void loopback_net_exit(struct net *net) unregister_netdev(dev); } -/* Registered in net/core/dev.c */ -struct pernet_operations __net_initdata loopback_net_ops = { +static struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, .exit = loopback_net_exit, }; + +static int __init loopback_init(void) +{ + return register_pernet_device(&loopback_net_ops); +} + +/* Loopback is special. It should be initialized before any other network + * device and network subsystem. + */ +fs_initcall(loopback_init); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 12d7f4469dc9..f1b0dbe58464 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,7 +1766,6 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } -extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/net/core/dev.c b/net/core/dev.c index e0dc67a789b7..2306d56fbb5e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4909,18 +4909,6 @@ static int __init net_dev_init(void) if (register_pernet_subsys(&netdev_net_ops)) goto out; - /* The loopback device is special if any other network devices - * is present in a network namespace the loopback device must - * be present. Since we now dynamically allocate and free the - * loopback device ensure this invariant is maintained by - * keeping the loopback device as the first device on the - * list of network devices. Ensuring the loopback devices - * is the first device that appears and the last network device - * that disappears. - */ - if (register_pernet_device(&loopback_net_ops)) - goto out; - if (register_pernet_device(&default_device_ops)) goto out; -- cgit v1.2.3 From 505d4f73dda9e20d59da05008f1f5eb432613e71 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 7 Nov 2008 22:54:20 -0800 Subject: net: Guaranetee the proper ordering of the loopback device. v2 I was recently hunting a bug that occurred in network namespace cleanup. In looking at the code it became apparrent that we have and will continue to have cases where if we have anything going on in a network namespace there will be assumptions that the loopback device is present. Things like sending igmp unsubscribe messages when we bring down network devices invokes the routing code which assumes that at least the loopback driver is present. Therefore to avoid magic initcall ordering hackery that is hard to follow and hard to get right insert a call to register the loopback device directly from net_dev_init(). This guarantes that the loopback device is the first device registered and the last network device to go away. But do it carefully so we register the loopback device after we clear dev_boot_phase. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/loopback.c | 13 ++----------- include/linux/netdevice.h | 1 + net/core/dev.c | 22 +++++++++++++++++----- 3 files changed, 20 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 91d08585a6d8..c4516b580ba5 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -204,17 +204,8 @@ static __net_exit void loopback_net_exit(struct net *net) unregister_netdev(dev); } -static struct pernet_operations __net_initdata loopback_net_ops = { +/* Registered in net/core/dev.c */ +struct pernet_operations __net_initdata loopback_net_ops = { .init = loopback_net_init, .exit = loopback_net_exit, }; - -static int __init loopback_init(void) -{ - return register_pernet_device(&loopback_net_ops); -} - -/* Loopback is special. It should be initialized before any other network - * device and network subsystem. - */ -fs_initcall(loopback_init); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f1b0dbe58464..12d7f4469dc9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1766,6 +1766,7 @@ static inline int skb_bond_should_drop(struct sk_buff *skb) return 0; } +extern struct pernet_operations __net_initdata loopback_net_ops; #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 2306d56fbb5e..31568b2068ac 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4909,9 +4909,6 @@ static int __init net_dev_init(void) if (register_pernet_subsys(&netdev_net_ops)) goto out; - if (register_pernet_device(&default_device_ops)) - goto out; - /* * Initialise the packet receive queues. */ @@ -4928,10 +4925,25 @@ static int __init net_dev_init(void) queue->backlog.weight = weight_p; } - netdev_dma_register(); - dev_boot_phase = 0; + /* The loopback device is special if any other network devices + * is present in a network namespace the loopback device must + * be present. Since we now dynamically allocate and free the + * loopback device ensure this invariant is maintained by + * keeping the loopback device as the first device on the + * list of network devices. Ensuring the loopback devices + * is the first device that appears and the last network device + * that disappears. + */ + if (register_pernet_device(&loopback_net_ops)) + goto out; + + if (register_pernet_device(&default_device_ops)) + goto out; + + netdev_dma_register(); + open_softirq(NET_TX_SOFTIRQ, net_tx_action); open_softirq(NET_RX_SOFTIRQ, net_rx_action); -- cgit v1.2.3 From f400923735ecbb67cbe4a3606c9479f694754f51 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 7 Nov 2008 22:56:00 -0800 Subject: pkt_sched: Control group classifier The classifier should cover the most common use case and will work without any special configuration. The principle of the classifier is to directly access the task_struct via get_current(). In order for this to work, classification requests from softirqs must be ignored. This is not a problem because the vast majority of packets in softirq context are not assigned to a task anyway. For this to work, a mechanism is needed to trace softirq context. This repost goes back to the method of relying on the number of nested bh disable calls for the sake of not adding too much complexity and the option to come up with something more reliable if actually needed. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/cgroup_subsys.h | 6 + include/linux/pkt_cls.h | 14 ++ net/sched/Kconfig | 11 ++ net/sched/Makefile | 1 + net/sched/cls_cgroup.c | 290 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 322 insertions(+) create mode 100644 net/sched/cls_cgroup.c (limited to 'include') diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 9c22396e8b50..9c8d31bacf46 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -54,3 +54,9 @@ SUBSYS(freezer) #endif /* */ + +#ifdef CONFIG_NET_CLS_CGROUP +SUBSYS(net_cls) +#endif + +/* */ diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 7cf7824df778..e6aa8482ad7a 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -394,6 +394,20 @@ enum #define TCA_BASIC_MAX (__TCA_BASIC_MAX - 1) + +/* Cgroup classifier */ + +enum +{ + TCA_CGROUP_UNSPEC, + TCA_CGROUP_ACT, + TCA_CGROUP_POLICE, + TCA_CGROUP_EMATCHES, + __TCA_CGROUP_MAX, +}; + +#define TCA_CGROUP_MAX (__TCA_CGROUP_MAX - 1) + /* Extended Matches */ struct tcf_ematch_tree_hdr diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 6767e54155db..36543b6fcef3 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -316,6 +316,17 @@ config NET_CLS_FLOW To compile this code as a module, choose M here: the module will be called cls_flow. +config NET_CLS_CGROUP + bool "Control Group Classifier" + select NET_CLS + depends on CGROUPS + ---help--- + Say Y here if you want to classify packets based on the control + cgroup of their process. + + To compile this code as a module, choose M here: the + module will be called cls_cgroup. + config NET_EMATCH bool "Extended Matches" select NET_CLS diff --git a/net/sched/Makefile b/net/sched/Makefile index e60c9925b269..70b35f8708c3 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o +obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o obj-$(CONFIG_NET_EMATCH) += ematch.o obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c new file mode 100644 index 000000000000..53ada2c0e41c --- /dev/null +++ b/net/sched/cls_cgroup.c @@ -0,0 +1,290 @@ +/* + * net/sched/cls_cgroup.c Control Group Classifier + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct cgroup_cls_state +{ + struct cgroup_subsys_state css; + u32 classid; +}; + +static inline struct cgroup_cls_state *net_cls_state(struct cgroup *cgrp) +{ + return (struct cgroup_cls_state *) + cgroup_subsys_state(cgrp, net_cls_subsys_id); +} + +static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, + struct cgroup *cgrp) +{ + struct cgroup_cls_state *cs; + + if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL))) + return ERR_PTR(-ENOMEM); + + if (cgrp->parent) + cs->classid = net_cls_state(cgrp->parent)->classid; + + return &cs->css; +} + +static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + kfree(ss); +} + +static u64 read_classid(struct cgroup *cgrp, struct cftype *cft) +{ + return net_cls_state(cgrp)->classid; +} + +static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value) +{ + if (!cgroup_lock_live_group(cgrp)) + return -ENODEV; + + net_cls_state(cgrp)->classid = (u32) value; + + cgroup_unlock(); + + return 0; +} + +static struct cftype ss_files[] = { + { + .name = "classid", + .read_u64 = read_classid, + .write_u64 = write_classid, + }, +}; + +static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) +{ + return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); +} + +struct cgroup_subsys net_cls_subsys = { + .name = "net_cls", + .create = cgrp_create, + .destroy = cgrp_destroy, + .populate = cgrp_populate, + .subsys_id = net_cls_subsys_id, +}; + +struct cls_cgroup_head +{ + u32 handle; + struct tcf_exts exts; + struct tcf_ematch_tree ematches; +}; + +static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, + struct tcf_result *res) +{ + struct cls_cgroup_head *head = tp->root; + struct cgroup_cls_state *cs; + int ret = 0; + + /* + * Due to the nature of the classifier it is required to ignore all + * packets originating from softirq context as accessing `current' + * would lead to false results. + * + * This test assumes that all callers of dev_queue_xmit() explicitely + * disable bh. Knowing this, it is possible to detect softirq based + * calls by looking at the number of nested bh disable calls because + * softirqs always disables bh. + */ + if (softirq_count() != SOFTIRQ_OFFSET) + return -1; + + rcu_read_lock(); + cs = (struct cgroup_cls_state *) task_subsys_state(current, + net_cls_subsys_id); + if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) { + res->classid = cs->classid; + res->class = 0; + ret = tcf_exts_exec(skb, &head->exts, res); + } else + ret = -1; + + rcu_read_unlock(); + + return ret; +} + +static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle) +{ + return 0UL; +} + +static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f) +{ +} + +static int cls_cgroup_init(struct tcf_proto *tp) +{ + return 0; +} + +static const struct tcf_ext_map cgroup_ext_map = { + .action = TCA_CGROUP_ACT, + .police = TCA_CGROUP_POLICE, +}; + +static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { + [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, +}; + +static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base, + u32 handle, struct nlattr **tca, + unsigned long *arg) +{ + struct nlattr *tb[TCA_CGROUP_MAX+1]; + struct cls_cgroup_head *head = tp->root; + struct tcf_ematch_tree t; + struct tcf_exts e; + int err; + + if (head == NULL) { + if (!handle) + return -EINVAL; + + head = kzalloc(sizeof(*head), GFP_KERNEL); + if (head == NULL) + return -ENOBUFS; + + head->handle = handle; + + tcf_tree_lock(tp); + tp->root = head; + tcf_tree_unlock(tp); + } + + if (handle != head->handle) + return -ENOENT; + + err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], + cgroup_policy); + if (err < 0) + return err; + + err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map); + if (err < 0) + return err; + + err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t); + if (err < 0) + return err; + + tcf_exts_change(tp, &head->exts, &e); + tcf_em_tree_change(tp, &head->ematches, &t); + + return 0; +} + +static void cls_cgroup_destroy(struct tcf_proto *tp) +{ + struct cls_cgroup_head *head; + + head = (struct cls_cgroup_head *)xchg(&tp->root, NULL); + + if (head) { + tcf_exts_destroy(tp, &head->exts); + tcf_em_tree_destroy(tp, &head->ematches); + kfree(head); + } +} + +static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg) +{ + return -EOPNOTSUPP; +} + +static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg) +{ + struct cls_cgroup_head *head = tp->root; + + if (arg->count < arg->skip) + goto skip; + + if (arg->fn(tp, (unsigned long) head, arg) < 0) { + arg->stop = 1; + return; + } +skip: + arg->count++; +} + +static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh, + struct sk_buff *skb, struct tcmsg *t) +{ + struct cls_cgroup_head *head = tp->root; + unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nest; + + t->tcm_handle = head->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + + if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 || + tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest); + + if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0) + goto nla_put_failure; + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { + .kind = "cgroup", + .init = cls_cgroup_init, + .change = cls_cgroup_change, + .classify = cls_cgroup_classify, + .destroy = cls_cgroup_destroy, + .get = cls_cgroup_get, + .put = cls_cgroup_put, + .delete = cls_cgroup_delete, + .walk = cls_cgroup_walk, + .dump = cls_cgroup_dump, + .owner = THIS_MODULE, +}; + +static int __init init_cgroup_cls(void) +{ + return register_tcf_proto_ops(&cls_cgroup_ops); +} + +static void __exit exit_cgroup_cls(void) +{ + unregister_tcf_proto_ops(&cls_cgroup_ops); +} + +module_init(init_cgroup_cls); +module_exit(exit_cgroup_cls); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 1239cd58d237fa6ad501acaec8776262a5784ec8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 28 Oct 2008 11:12:57 +0100 Subject: wireless: move mesh config length constant This is a constant from the 802.11 specification. Signed-off-by: Johannes Berg Cc: Javier Cardona Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 3 +++ net/mac80211/mesh.c | 2 +- net/mac80211/mesh.h | 5 +---- net/mac80211/scan.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index aad99195a4cc..9dc288b920c8 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -97,7 +97,10 @@ #define IEEE80211_MAX_FRAME_LEN 2352 #define IEEE80211_MAX_SSID_LEN 32 + #define IEEE80211_MAX_MESH_ID_LEN 32 +#define IEEE80211_MESH_CONFIG_LEN 19 + #define IEEE80211_QOS_CTL_LEN 2 #define IEEE80211_QOS_CTL_TID_MASK 0x000F #define IEEE80211_QOS_CTL_TAG1D_MASK 0x0007 diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index d3b6e1a648bd..82f568e94365 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -238,7 +238,7 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) pos = skb_put(skb, 21); *pos++ = WLAN_EID_MESH_CONFIG; - *pos++ = MESH_CFG_LEN; + *pos++ = IEEE80211_MESH_CONFIG_LEN; /* Version */ *pos++ = 1; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index e10471c6ba42..c197ab545e54 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -145,9 +145,6 @@ struct mesh_rmc { }; -/* Mesh IEs constants */ -#define MESH_CFG_LEN 19 - /* * MESH_CFG_COMP_LEN Includes: * - Active path selection protocol ID. @@ -157,7 +154,7 @@ struct mesh_rmc { * Does not include mesh capabilities, which may vary across nodes in the same * mesh */ -#define MESH_CFG_CMP_LEN 17 +#define MESH_CFG_CMP_LEN (IEEE80211_MESH_CONFIG_LEN - 2) /* Default values, timeouts in ms */ #define MESH_TTL 5 diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 7372d7abb8c0..f5c7c3371929 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -159,7 +159,7 @@ ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_i { struct ieee80211_bss *bss; - if (mesh_config_len != MESH_CFG_LEN) + if (mesh_config_len != IEEE80211_MESH_CONFIG_LEN) return NULL; bss = kzalloc(sizeof(*bss), GFP_ATOMIC); -- cgit v1.2.3 From 41bb73eeac5ff5fb217257ba33b654747b3abf11 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 29 Oct 2008 01:09:37 +0100 Subject: mac80211: remove SSID driver code Remove the SSID from the driver API since now there is no driver that requires knowing the SSID and I think it's unlikely that any hardware design that does require the SSID will play well with mac80211. This also removes support for setting the SSID in master mode which will require a patch to hostapd to not try. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-agn.c | 7 ------- drivers/net/wireless/iwlwifi/iwl3945-base.c | 7 ------- include/net/mac80211.h | 11 +---------- net/mac80211/ieee80211_i.h | 3 --- net/mac80211/main.c | 15 +++------------ net/mac80211/mlme.c | 14 -------------- net/mac80211/wext.c | 16 ---------------- 7 files changed, 4 insertions(+), 69 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index ce3141f07f35..ad186e134dec 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -2957,13 +2957,6 @@ static int iwl4965_mac_config_interface(struct ieee80211_hw *hw, return rc; } - if ((priv->iw_mode == NL80211_IFTYPE_AP) && - (!conf->ssid_len)) { - IWL_DEBUG_MAC80211 - ("Leaving in AP mode because HostAPD is not ready.\n"); - return 0; - } - if (!iwl_is_alive(priv)) return -EAGAIN; diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 0c2d778a9ff7..47881be9ba44 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -6743,13 +6743,6 @@ static int iwl3945_mac_config_interface(struct ieee80211_hw *hw, /* XXX: this MUST use conf->mac_addr */ - if ((priv->iw_mode == NL80211_IFTYPE_AP) && - (!conf->ssid_len)) { - IWL_DEBUG_MAC80211 - ("Leaving in AP mode because HostAPD is not ready.\n"); - return 0; - } - if (!iwl3945_is_alive(priv)) return -EAGAIN; diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0b983bed3829..af2ec6f9beb9 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -616,14 +616,12 @@ struct ieee80211_if_init_conf { * enum ieee80211_if_conf_change - interface config change flags * * @IEEE80211_IFCC_BSSID: The BSSID changed. - * @IEEE80211_IFCC_SSID: The SSID changed. * @IEEE80211_IFCC_BEACON: The beacon for this interface changed * (currently AP and MESH only), use ieee80211_beacon_get(). */ enum ieee80211_if_conf_change { IEEE80211_IFCC_BSSID = BIT(0), - IEEE80211_IFCC_SSID = BIT(1), - IEEE80211_IFCC_BEACON = BIT(2), + IEEE80211_IFCC_BEACON = BIT(1), }; /** @@ -631,11 +629,6 @@ enum ieee80211_if_conf_change { * * @changed: parameters that have changed, see &enum ieee80211_if_conf_change. * @bssid: BSSID of the network we are associated to/creating. - * @ssid: used (together with @ssid_len) by drivers for hardware that - * generate beacons independently. The pointer is valid only during the - * config_interface() call, so copy the value somewhere if you need - * it. - * @ssid_len: length of the @ssid field. * * This structure is passed to the config_interface() callback of * &struct ieee80211_hw. @@ -643,8 +636,6 @@ enum ieee80211_if_conf_change { struct ieee80211_if_conf { u32 changed; u8 *bssid; - u8 *ssid; - size_t ssid_len; }; /** diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2c91108e3901..155a20410017 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -212,9 +212,6 @@ struct ieee80211_if_ap { struct list_head vlans; - u8 ssid[IEEE80211_MAX_SSID_LEN]; - size_t ssid_len; - /* yes, this looks ugly, but guarantees that we can later use * bitmap_empty :) * NB: don't touch this bitmap, use sta_info_{set,clear}_tim_bit */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index fa0cc7a1e6b4..d631dc96c323 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -171,19 +171,13 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) conf.changed = changed; if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) { + sdata->vif.type == NL80211_IFTYPE_ADHOC) conf.bssid = sdata->u.sta.bssid; - conf.ssid = sdata->u.sta.ssid; - conf.ssid_len = sdata->u.sta.ssid_len; - } else if (sdata->vif.type == NL80211_IFTYPE_AP) { + else if (sdata->vif.type == NL80211_IFTYPE_AP) conf.bssid = sdata->dev->dev_addr; - conf.ssid = sdata->u.ap.ssid; - conf.ssid_len = sdata->u.ap.ssid_len; - } else if (ieee80211_vif_is_mesh(&sdata->vif)) { + else if (ieee80211_vif_is_mesh(&sdata->vif)) { u8 zero[ETH_ALEN] = { 0 }; conf.bssid = zero; - conf.ssid = zero; - conf.ssid_len = 0; } else { WARN_ON(1); return -EINVAL; @@ -192,9 +186,6 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) if (WARN_ON(!conf.bssid && (changed & IEEE80211_IFCC_BSSID))) return -EINVAL; - if (WARN_ON(!conf.ssid && (changed & IEEE80211_IFCC_SSID))) - return -EINVAL; - return local->ops->config_interface(local_to_hw(local), &sdata->vif, &conf); } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 9e6c75abc922..708eb4502ed7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2416,7 +2416,6 @@ void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) { struct ieee80211_if_sta *ifsta; - int res; if (len > IEEE80211_MAX_SSID_LEN) return -EINVAL; @@ -2428,19 +2427,6 @@ int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size memcpy(ifsta->ssid, ssid, len); ifsta->ssid_len = len; ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; - - res = 0; - /* - * Hack! MLME code needs to be cleaned up to have different - * entry points for configuration and internal selection change - */ - if (netif_running(sdata->dev)) - res = ieee80211_if_config(sdata, IEEE80211_IFCC_SSID); - if (res) { - printk(KERN_DEBUG "%s: Failed to config new SSID to " - "the low-level driver\n", sdata->dev->name); - return res; - } } if (len) diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 231cab57351f..63f36e9d1af8 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -407,13 +407,6 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, return 0; } - if (sdata->vif.type == NL80211_IFTYPE_AP) { - memcpy(sdata->u.ap.ssid, ssid, len); - memset(sdata->u.ap.ssid + len, 0, - IEEE80211_MAX_SSID_LEN - len); - sdata->u.ap.ssid_len = len; - return ieee80211_if_config(sdata, IEEE80211_IFCC_SSID); - } return -EOPNOTSUPP; } @@ -437,15 +430,6 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev, return res; } - if (sdata->vif.type == NL80211_IFTYPE_AP) { - len = sdata->u.ap.ssid_len; - if (len > IW_ESSID_MAX_SIZE) - len = IW_ESSID_MAX_SIZE; - memcpy(ssid, sdata->u.ap.ssid, len); - data->length = len; - data->flags = 1; - return 0; - } return -EOPNOTSUPP; } -- cgit v1.2.3 From 8469cdef1f123e2e3e56645f1ac26c7cfb333d9c Mon Sep 17 00:00:00 2001 From: Sujith Date: Wed, 29 Oct 2008 10:19:28 +0530 Subject: mac80211: Add a new event in ieee80211_ampdu_mlme_action Send a notification to the driver on succesful reception of an ADDBA response, add IEEE80211_AMPDU_TX_RESUME for this purpose. Signed-off-by: Sujith Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/core.h | 1 + drivers/net/wireless/ath9k/main.c | 3 +++ drivers/net/wireless/ath9k/xmit.c | 19 +++++++++++++++++++ include/net/mac80211.h | 2 ++ net/mac80211/ht.c | 10 +++++++++- 5 files changed, 34 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/wireless/ath9k/core.h b/drivers/net/wireless/ath9k/core.h index 5b17e88ab9a9..69e8d3e41131 100644 --- a/drivers/net/wireless/ath9k/core.h +++ b/drivers/net/wireless/ath9k/core.h @@ -581,6 +581,7 @@ void ath_tx_aggr_teardown(struct ath_softc *sc, int ath_tx_aggr_start(struct ath_softc *sc, struct ieee80211_sta *sta, u16 tid, u16 *ssn); int ath_tx_aggr_stop(struct ath_softc *sc, struct ieee80211_sta *sta, u16 tid); +void ath_tx_aggr_resume(struct ath_softc *sc, struct ieee80211_sta *sta, u16 tid); void ath_newassoc(struct ath_softc *sc, struct ath_node *node, int isnew, int isuapsd); void ath_node_attach(struct ath_softc *sc, struct ieee80211_sta *sta); diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c index 65a532e08ecd..fb50aa0fc996 100644 --- a/drivers/net/wireless/ath9k/main.c +++ b/drivers/net/wireless/ath9k/main.c @@ -1482,6 +1482,9 @@ static int ath9k_ampdu_action(struct ieee80211_hw *hw, ieee80211_stop_tx_ba_cb_irqsafe(hw, sta->addr, tid); break; + case IEEE80211_AMPDU_TX_RESUME: + ath_tx_aggr_resume(sc, sta, tid); + break; default: DPRINTF(sc, ATH_DBG_FATAL, "%s: Unknown AMPDU action\n", __func__); diff --git a/drivers/net/wireless/ath9k/xmit.c b/drivers/net/wireless/ath9k/xmit.c index 7e6f4e59a5d1..fe386b6dadac 100644 --- a/drivers/net/wireless/ath9k/xmit.c +++ b/drivers/net/wireless/ath9k/xmit.c @@ -2371,6 +2371,25 @@ int ath_tx_aggr_stop(struct ath_softc *sc, struct ieee80211_sta *sta, u16 tid) return 0; } +/* Resume tx aggregation */ + +void ath_tx_aggr_resume(struct ath_softc *sc, struct ieee80211_sta *sta, u16 tid) +{ + struct ath_atx_tid *txtid; + struct ath_node *an; + + an = (struct ath_node *)sta->drv_priv; + + if (sc->sc_flags & SC_OP_TXAGGR) { + txtid = ATH_AN_2_TID(an, tid); + txtid->baw_size = + IEEE80211_MIN_AMPDU_BUF << sta->ht_cap.ampdu_factor; + txtid->state |= AGGR_ADDBA_COMPLETE; + txtid->state &= ~AGGR_ADDBA_PROGRESS; + ath_tx_resume_tid(sc, txtid); + } +} + /* * Performs transmit side cleanup when TID changes from aggregated to * unaggregated. diff --git a/include/net/mac80211.h b/include/net/mac80211.h index af2ec6f9beb9..53856003aa18 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1127,12 +1127,14 @@ enum ieee80211_filter_flags { * @IEEE80211_AMPDU_RX_STOP: stop Rx aggregation * @IEEE80211_AMPDU_TX_START: start Tx aggregation * @IEEE80211_AMPDU_TX_STOP: stop Tx aggregation + * @IEEE80211_AMPDU_TX_RESUME: resume TX aggregation */ enum ieee80211_ampdu_mlme_action { IEEE80211_AMPDU_RX_START, IEEE80211_AMPDU_RX_STOP, IEEE80211_AMPDU_TX_START, IEEE80211_AMPDU_TX_STOP, + IEEE80211_AMPDU_TX_RESUME, }; /** diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 08009d4b7d6e..3e231d756776 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -987,7 +987,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, { struct ieee80211_hw *hw = &local->hw; u16 capab; - u16 tid; + u16 tid, start_seq_num; u8 *state; capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); @@ -1024,6 +1024,14 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, local->hw.ampdu_queues) ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + if (local->ops->ampdu_action) { + (void)local->ops->ampdu_action(hw, + IEEE80211_AMPDU_TX_RESUME, + &sta->sta, tid, &start_seq_num); + } +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Resuming TX aggregation for tid %d\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ spin_unlock_bh(&sta->lock); } else { sta->ampdu_mlme.addba_req_num[tid]++; -- cgit v1.2.3 From bd815252720e4b667d9946d050d003ec89bda099 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 29 Oct 2008 20:00:45 +0100 Subject: wireless: implement basic rate helper function This adds a helper function that, given a bitmap of basic rates and a bitrate returns the response rate for this rate. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/wireless.h | 16 ++++++++++++++++ net/wireless/util.c | 19 +++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'include') diff --git a/include/net/wireless.h b/include/net/wireless.h index 41294c5f6f8f..17d4b582cf34 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -340,6 +340,22 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq) return __ieee80211_get_channel(wiphy, freq); } +/** + * ieee80211_get_response_rate - get basic rate for a given rate + * + * @sband: the band to look for rates in + * @basic_rates: bitmap of basic rates + * @bitrate: the bitrate for which to find the basic rate + * + * This function returns the basic rate corresponding to a given + * bitrate, that is the next lower bitrate contained in the basic + * rate map, which is, for this function, given as a bitmap of + * indices of rates in the band's bitrate table. + */ +struct ieee80211_rate * +ieee80211_get_response_rate(struct ieee80211_supported_band *sband, + u64 basic_rates, int bitrate); + /** * regulatory_hint - driver hint to the wireless core a regulatory domain * @wiphy: the wireless device giving the hint (used only for reporting diff --git a/net/wireless/util.c b/net/wireless/util.c index f54424693a38..e76cc28b0345 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -7,6 +7,25 @@ #include #include "core.h" +struct ieee80211_rate * +ieee80211_get_response_rate(struct ieee80211_supported_band *sband, + u64 basic_rates, int bitrate) +{ + struct ieee80211_rate *result = &sband->bitrates[0]; + int i; + + for (i = 0; i < sband->n_bitrates; i++) { + if (!(basic_rates & BIT(i))) + continue; + if (sband->bitrates[i].bitrate > bitrate) + continue; + result = &sband->bitrates[i]; + } + + return result; +} +EXPORT_SYMBOL(ieee80211_get_response_rate); + int ieee80211_channel_to_frequency(int chan) { if (chan < 14) -- cgit v1.2.3 From 90c97a040d6b08cc4890328aa262fdc37336ab01 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 30 Oct 2008 16:59:22 +0200 Subject: nl80211: Add basic rate configuration for AP mode Add a new attribute, NL80211_ATTR_BSS_BASIC_RATES, that can be used with NL80211_CMD_SET_BSS for userspace (e.g., hostapd) to set which rates are in the basic rate set. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 6 ++++++ include/net/cfg80211.h | 5 +++++ net/mac80211/cfg.c | 18 ++++++++++++++++++ net/wireless/nl80211.c | 8 ++++++++ 4 files changed, 37 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e4cc7869b22f..5009809588c0 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -243,6 +243,9 @@ enum nl80211_commands { * (u8, 0 or 1) * @NL80211_ATTR_BSS_SHORT_SLOT_TIME: whether short slot time enabled * (u8, 0 or 1) + * @NL80211_ATTR_BSS_BASIC_RATES: basic rates, array of basic + * rates in format defined by IEEE 802.11 7.3.2.2 but without the length + * restriction (at most %NL80211_MAX_SUPP_RATES). * * @NL80211_ATTR_HT_CAPABILITY: HT Capability information element (from * association request when used with NL80211_CMD_NEW_STATION) @@ -307,6 +310,8 @@ enum nl80211_attrs { NL80211_ATTR_MESH_PARAMS, + NL80211_ATTR_BSS_BASIC_RATES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -318,6 +323,7 @@ enum nl80211_attrs { * here */ #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY +#define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_REG_RULES 32 diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 03e1e88c6a09..7caf3c76a12f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -280,11 +280,16 @@ struct mpath_info { * (0 = no, 1 = yes, -1 = do not change) * @use_short_slot_time: Whether the use of short slot time is allowed * (0 = no, 1 = yes, -1 = do not change) + * @basic_rates: basic rates in IEEE 802.11 format + * (or NULL for no change) + * @basic_rates_len: number of basic rates */ struct bss_parameters { int use_cts_prot; int use_short_preamble; int use_short_slot_time; + u8 *basic_rates; + u8 basic_rates_len; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 91f56a48e2b4..442a4d7b1808 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1046,6 +1046,24 @@ static int ieee80211_change_bss(struct wiphy *wiphy, changed |= BSS_CHANGED_ERP_SLOT; } + if (params->basic_rates) { + int i, j; + u32 rates = 0; + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_supported_band *sband = + wiphy->bands[local->oper_channel->band]; + + for (i = 0; i < params->basic_rates_len; i++) { + int rate = (params->basic_rates[i] & 0x7f) * 5; + for (j = 0; j < sband->n_bitrates; j++) { + if (sband->bitrates[j].bitrate == rate) + rates |= BIT(j); + } + } + sdata->vif.bss_conf.basic_rates = rates; + changed |= BSS_CHANGED_BASIC_RATES; + } + ieee80211_bss_info_change_notify(sdata, changed); return 0; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5e1d658a8b5a..1ea5e3fd3931 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -95,6 +95,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 }, + [NL80211_ATTR_BSS_BASIC_RATES] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_RATES }, [NL80211_ATTR_MESH_PARAMS] = { .type = NLA_NESTED }, @@ -1613,6 +1615,12 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]) params.use_short_slot_time = nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]); + if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { + params.basic_rates = + nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + params.basic_rates_len = + nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + } err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) -- cgit v1.2.3 From 318884875bdddca663ecc373c813cf8e117d9e43 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 30 Oct 2008 16:59:24 +0200 Subject: nl80211: Add TX queue parameter configuration Add a new attribute, NL80211_ATTR_WIPHY_TXQ_PARAMS, that can be used with NL80211_CMD_SET_WIPHY for userspace (e.g., hostapd) to set TX queue parameters (txop, cwmin, cwmax, aifs). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 43 +++++++++++++++++++++++++++++-- include/net/cfg80211.h | 23 +++++++++++++++++ net/mac80211/cfg.c | 25 ++++++++++++++++++ net/wireless/nl80211.c | 67 +++++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 151 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 5009809588c0..79827345351d 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -25,8 +25,9 @@ * * @NL80211_CMD_GET_WIPHY: request information about a wiphy or dump request * to get a list of all present wiphys. - * @NL80211_CMD_SET_WIPHY: set wiphy name, needs %NL80211_ATTR_WIPHY and - * %NL80211_ATTR_WIPHY_NAME. + * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or + * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME + * and/or %NL80211_ATTR_WIPHY_TXQ_PARAMS. * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request * or rename notification. Has attributes %NL80211_ATTR_WIPHY and * %NL80211_ATTR_WIPHY_NAME. @@ -178,6 +179,7 @@ enum nl80211_commands { * @NL80211_ATTR_WIPHY: index of wiphy to operate on, cf. * /sys/class/ieee80211//index * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming) + * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters * * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on * @NL80211_ATTR_IFNAME: network interface name @@ -312,6 +314,8 @@ enum nl80211_attrs { NL80211_ATTR_BSS_BASIC_RATES, + NL80211_ATTR_WIPHY_TXQ_PARAMS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -324,6 +328,7 @@ enum nl80211_attrs { */ #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY #define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES +#define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_REG_RULES 32 @@ -698,4 +703,38 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1 }; +/** + * enum nl80211_txq_attr - TX queue parameter attributes + * @__NL80211_TXQ_ATTR_INVALID: Attribute number 0 is reserved + * @NL80211_TXQ_ATTR_QUEUE: TX queue identifier (NL80211_TXQ_Q_*) + * @NL80211_TXQ_ATTR_TXOP: Maximum burst time in units of 32 usecs, 0 meaning + * disabled + * @NL80211_TXQ_ATTR_CWMIN: Minimum contention window [a value of the form + * 2^n-1 in the range 1..32767] + * @NL80211_TXQ_ATTR_CWMAX: Maximum contention window [a value of the form + * 2^n-1 in the range 1..32767] + * @NL80211_TXQ_ATTR_AIFS: Arbitration interframe space [0..255] + * @__NL80211_TXQ_ATTR_AFTER_LAST: Internal + * @NL80211_TXQ_ATTR_MAX: Maximum TXQ attribute number + */ +enum nl80211_txq_attr { + __NL80211_TXQ_ATTR_INVALID, + NL80211_TXQ_ATTR_QUEUE, + NL80211_TXQ_ATTR_TXOP, + NL80211_TXQ_ATTR_CWMIN, + NL80211_TXQ_ATTR_CWMAX, + NL80211_TXQ_ATTR_AIFS, + + /* keep last */ + __NL80211_TXQ_ATTR_AFTER_LAST, + NL80211_TXQ_ATTR_MAX = __NL80211_TXQ_ATTR_AFTER_LAST - 1 +}; + +enum nl80211_txq_q { + NL80211_TXQ_Q_VO, + NL80211_TXQ_Q_VI, + NL80211_TXQ_Q_BE, + NL80211_TXQ_Q_BK +}; + #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7caf3c76a12f..448023193e55 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -371,6 +371,24 @@ struct mesh_config { u16 dot11MeshHWMPnetDiameterTraversalTime; }; +/** + * struct ieee80211_txq_params - TX queue parameters + * @queue: TX queue identifier (NL80211_TXQ_Q_*) + * @txop: Maximum burst time in units of 32 usecs, 0 meaning disabled + * @cwmin: Minimum contention window [a value of the form 2^n-1 in the range + * 1..32767] + * @cwmax: Maximum contention window [a value of the form 2^n-1 in the range + * 1..32767] + * @aifs: Arbitration interframe space [0..255] + */ +struct ieee80211_txq_params { + enum nl80211_txq_q queue; + u16 txop; + u16 cwmin; + u16 cwmax; + u8 aifs; +}; + /* from net/wireless.h */ struct wiphy; @@ -430,6 +448,8 @@ struct wiphy; * @set_mesh_cfg: set mesh parameters (by now, just mesh id) * * @change_bss: Modify parameters for a given BSS. + * + * @set_txq_params: Set TX queue parameters */ struct cfg80211_ops { int (*add_virtual_intf)(struct wiphy *wiphy, char *name, @@ -490,6 +510,9 @@ struct cfg80211_ops { const struct mesh_config *nconf, u32 mask); int (*change_bss)(struct wiphy *wiphy, struct net_device *dev, struct bss_parameters *params); + + int (*set_txq_params)(struct wiphy *wiphy, + struct ieee80211_txq_params *params); }; #endif /* __NET_CFG80211_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 442a4d7b1808..fe672faa819d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1069,6 +1069,30 @@ static int ieee80211_change_bss(struct wiphy *wiphy, return 0; } +static int ieee80211_set_txq_params(struct wiphy *wiphy, + struct ieee80211_txq_params *params) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + struct ieee80211_tx_queue_params p; + + if (!local->ops->conf_tx) + return -EOPNOTSUPP; + + memset(&p, 0, sizeof(p)); + p.aifs = params->aifs; + p.cw_max = params->cwmax; + p.cw_min = params->cwmin; + p.txop = params->txop; + if (local->ops->conf_tx(local_to_hw(local), params->queue, &p)) { + printk(KERN_DEBUG "%s: failed to set TX queue " + "parameters for queue %d\n", local->mdev->name, + params->queue); + return -EINVAL; + } + + return 0; +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1095,4 +1119,5 @@ struct cfg80211_ops mac80211_config_ops = { .get_mesh_params = ieee80211_get_mesh_params, #endif .change_bss = ieee80211_change_bss, + .set_txq_params = ieee80211_set_txq_params, }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1ea5e3fd3931..e3e1494e769a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -58,6 +58,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, .len = BUS_ID_SIZE-1 }, + [NL80211_ATTR_WIPHY_TXQ_PARAMS] = { .type = NLA_NESTED }, [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 }, [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, @@ -286,20 +287,76 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) return -ENOBUFS; } +static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { + [NL80211_TXQ_ATTR_QUEUE] = { .type = NLA_U8 }, + [NL80211_TXQ_ATTR_TXOP] = { .type = NLA_U16 }, + [NL80211_TXQ_ATTR_CWMIN] = { .type = NLA_U16 }, + [NL80211_TXQ_ATTR_CWMAX] = { .type = NLA_U16 }, + [NL80211_TXQ_ATTR_AIFS] = { .type = NLA_U8 }, +}; + +static int parse_txq_params(struct nlattr *tb[], + struct ieee80211_txq_params *txq_params) +{ + if (!tb[NL80211_TXQ_ATTR_QUEUE] || !tb[NL80211_TXQ_ATTR_TXOP] || + !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || + !tb[NL80211_TXQ_ATTR_AIFS]) + return -EINVAL; + + txq_params->queue = nla_get_u8(tb[NL80211_TXQ_ATTR_QUEUE]); + txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); + txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); + txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); + txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); + + return 0; +} + static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; - int result; - - if (!info->attrs[NL80211_ATTR_WIPHY_NAME]) - return -EINVAL; + int result = 0, rem_txq_params = 0; + struct nlattr *nl_txq_params; rdev = cfg80211_get_dev_from_info(info); if (IS_ERR(rdev)) return PTR_ERR(rdev); - result = cfg80211_dev_rename(rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); + if (info->attrs[NL80211_ATTR_WIPHY_NAME]) { + result = cfg80211_dev_rename( + rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); + if (result) + goto bad_res; + } + + if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) { + struct ieee80211_txq_params txq_params; + struct nlattr *tb[NL80211_TXQ_ATTR_MAX + 1]; + + if (!rdev->ops->set_txq_params) { + result = -EOPNOTSUPP; + goto bad_res; + } + + nla_for_each_nested(nl_txq_params, + info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], + rem_txq_params) { + nla_parse(tb, NL80211_TXQ_ATTR_MAX, + nla_data(nl_txq_params), + nla_len(nl_txq_params), + txq_params_policy); + result = parse_txq_params(tb, &txq_params); + if (result) + goto bad_res; + + result = rdev->ops->set_txq_params(&rdev->wiphy, + &txq_params); + if (result) + goto bad_res; + } + } +bad_res: cfg80211_put_dev(rdev); return result; } -- cgit v1.2.3 From fc6971d491517ba15e800540ff88caa55dc65b01 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 30 Oct 2008 19:59:05 +0200 Subject: mac80211_hwsim: Add support for client PS mode This introduces a debugfs file (ieee80211/phy#/hwsim/ps) that can be used to force a simulated radio into power save mode. Following values can be written into this file to change PS mode: 0 = power save disabled (constantly awake) 1 = power save enabled (drop all frames; do not send PS-Poll) 2 = power save enabled (send PS-Poll frames automatically to receive buffered unicast frames); not yet fully implemented 3 = manual PS-Poll trigger (send a single PS-Poll frame) Two different behavior for power save mode processing can be tested: - move between modes 1 and 0 (i.e., receive all buffered frames at a time) - move to mode 1 and use manual PS-Poll frames (write 3 to the 'ps' debugfs file) to fetch power save buffered frames one at a time Mode 2 (automatic PS-Poll) does not yet parse Beacon frames, but eventually, it should take a look at TIM IE and send PS-Poll if a traffic bit is set for our AID. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- drivers/net/wireless/mac80211_hwsim.c | 184 ++++++++++++++++++++++++++++++++++ include/linux/ieee80211.h | 7 ++ 2 files changed, 191 insertions(+) (limited to 'include') diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index bc9da9393760..b9230da925ee 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -21,6 +21,7 @@ #include #include #include +#include MODULE_AUTHOR("Jouni Malinen"); MODULE_DESCRIPTION("Software simulator of 802.11 radio(s) for mac80211"); @@ -32,6 +33,9 @@ MODULE_PARM_DESC(radios, "Number of simulated radios"); struct hwsim_vif_priv { u32 magic; + u8 bssid[ETH_ALEN]; + bool assoc; + u16 aid; }; #define HWSIM_VIF_MAGIC 0x69537748 @@ -132,6 +136,12 @@ struct mac80211_hwsim_data { unsigned int rx_filter; int started; struct timer_list beacon_timer; + enum ps_mode { + PS_DISABLED, PS_ENABLED, PS_AUTO_POLL, PS_MANUAL_POLL + } ps; + bool ps_poll_pending; + struct dentry *debugfs; + struct dentry *debugfs_ps; }; @@ -196,6 +206,34 @@ static void mac80211_hwsim_monitor_rx(struct ieee80211_hw *hw, } +static bool hwsim_ps_rx_ok(struct mac80211_hwsim_data *data, + struct sk_buff *skb) +{ + switch (data->ps) { + case PS_DISABLED: + return true; + case PS_ENABLED: + return false; + case PS_AUTO_POLL: + /* TODO: accept (some) Beacons by default and other frames only + * if pending PS-Poll has been sent */ + return true; + case PS_MANUAL_POLL: + /* Allow unicast frames to own address if there is a pending + * PS-Poll */ + if (data->ps_poll_pending && + memcmp(data->hw->wiphy->perm_addr, skb->data + 4, + ETH_ALEN) == 0) { + data->ps_poll_pending = false; + return true; + } + return false; + } + + return true; +} + + static bool mac80211_hwsim_tx_frame(struct ieee80211_hw *hw, struct sk_buff *skb) { @@ -212,6 +250,9 @@ static bool mac80211_hwsim_tx_frame(struct ieee80211_hw *hw, rx_status.rate_idx = info->control.rates[0].idx; /* TODO: simulate signal strength (and optional packet drop) */ + if (data->ps != PS_DISABLED) + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PM); + /* Copy skb to all enabled radios that are on the current frequency */ spin_lock(&hwsim_radio_lock); list_for_each_entry(data2, &hwsim_radios, list) { @@ -221,6 +262,7 @@ static bool mac80211_hwsim_tx_frame(struct ieee80211_hw *hw, continue; if (!data2->started || !data2->radio_enabled || + !hwsim_ps_rx_ok(data2, skb) || data->channel->center_freq != data2->channel->center_freq) continue; @@ -404,7 +446,16 @@ static int mac80211_hwsim_config_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_if_conf *conf) { + struct hwsim_vif_priv *vp = (void *)vif->drv_priv; + hwsim_check_magic(vif); + if (conf->changed & IEEE80211_IFCC_BSSID) { + DECLARE_MAC_BUF(mac); + printk(KERN_DEBUG "%s:%s: BSSID changed: %s\n", + wiphy_name(hw->wiphy), __func__, + print_mac(mac, conf->bssid)); + memcpy(vp->bssid, conf->bssid, ETH_ALEN); + } return 0; } @@ -413,6 +464,8 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_bss_conf *info, u32 changed) { + struct hwsim_vif_priv *vp = (void *)vif->drv_priv; + hwsim_check_magic(vif); printk(KERN_DEBUG "%s:%s(changed=0x%x)\n", @@ -421,6 +474,8 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, if (changed & BSS_CHANGED_ASSOC) { printk(KERN_DEBUG " %s: ASSOC: assoc=%d aid=%d\n", wiphy_name(hw->wiphy), info->assoc, info->aid); + vp->assoc = info->assoc; + vp->aid = info->aid; } if (changed & BSS_CHANGED_ERP_CTS_PROT) { @@ -518,6 +573,8 @@ static void mac80211_hwsim_free(void) spin_unlock_bh(&hwsim_radio_lock); list_for_each_entry(data, &tmplist, list) { + debugfs_remove(data->debugfs_ps); + debugfs_remove(data->debugfs); ieee80211_unregister_hw(data->hw); device_unregister(data->dev); ieee80211_free_hw(data->hw); @@ -543,6 +600,127 @@ static void hwsim_mon_setup(struct net_device *dev) } +static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif) +{ + struct mac80211_hwsim_data *data = dat; + struct hwsim_vif_priv *vp = (void *)vif->drv_priv; + DECLARE_MAC_BUF(buf); + struct sk_buff *skb; + struct ieee80211_pspoll *pspoll; + + if (!vp->assoc) + return; + + printk(KERN_DEBUG "%s:%s: send PS-Poll to %s for aid %d\n", + wiphy_name(data->hw->wiphy), __func__, + print_mac(buf, vp->bssid), vp->aid); + + skb = dev_alloc_skb(sizeof(*pspoll)); + if (!skb) + return; + pspoll = (void *) skb_put(skb, sizeof(*pspoll)); + pspoll->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL | + IEEE80211_STYPE_PSPOLL | + IEEE80211_FCTL_PM); + pspoll->aid = cpu_to_le16(0xc000 | vp->aid); + memcpy(pspoll->bssid, vp->bssid, ETH_ALEN); + memcpy(pspoll->ta, mac, ETH_ALEN); + if (data->radio_enabled && + !mac80211_hwsim_tx_frame(data->hw, skb)) + printk(KERN_DEBUG "%s: PS-Poll frame not ack'ed\n", __func__); + dev_kfree_skb(skb); +} + + +static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, + struct ieee80211_vif *vif, int ps) +{ + struct hwsim_vif_priv *vp = (void *)vif->drv_priv; + DECLARE_MAC_BUF(buf); + struct sk_buff *skb; + struct ieee80211_hdr *hdr; + + if (!vp->assoc) + return; + + printk(KERN_DEBUG "%s:%s: send data::nullfunc to %s ps=%d\n", + wiphy_name(data->hw->wiphy), __func__, + print_mac(buf, vp->bssid), ps); + + skb = dev_alloc_skb(sizeof(*hdr)); + if (!skb) + return; + hdr = (void *) skb_put(skb, sizeof(*hdr) - ETH_ALEN); + hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA | + IEEE80211_STYPE_NULLFUNC | + (ps ? IEEE80211_FCTL_PM : 0)); + hdr->duration_id = cpu_to_le16(0); + memcpy(hdr->addr1, vp->bssid, ETH_ALEN); + memcpy(hdr->addr2, mac, ETH_ALEN); + memcpy(hdr->addr3, vp->bssid, ETH_ALEN); + if (data->radio_enabled && + !mac80211_hwsim_tx_frame(data->hw, skb)) + printk(KERN_DEBUG "%s: nullfunc frame not ack'ed\n", __func__); + dev_kfree_skb(skb); +} + + +static void hwsim_send_nullfunc_ps(void *dat, u8 *mac, + struct ieee80211_vif *vif) +{ + struct mac80211_hwsim_data *data = dat; + hwsim_send_nullfunc(data, mac, vif, 1); +} + + +static void hwsim_send_nullfunc_no_ps(void *dat, u8 *mac, + struct ieee80211_vif *vif) +{ + struct mac80211_hwsim_data *data = dat; + hwsim_send_nullfunc(data, mac, vif, 0); +} + + +static int hwsim_fops_ps_read(void *dat, u64 *val) +{ + struct mac80211_hwsim_data *data = dat; + *val = data->ps; + return 0; +} + +static int hwsim_fops_ps_write(void *dat, u64 val) +{ + struct mac80211_hwsim_data *data = dat; + enum ps_mode old_ps; + + if (val != PS_DISABLED && val != PS_ENABLED && val != PS_AUTO_POLL && + val != PS_MANUAL_POLL) + return -EINVAL; + + old_ps = data->ps; + data->ps = val; + + if (val == PS_MANUAL_POLL) { + ieee80211_iterate_active_interfaces(data->hw, + hwsim_send_ps_poll, data); + data->ps_poll_pending = true; + } else if (old_ps == PS_DISABLED && val != PS_DISABLED) { + ieee80211_iterate_active_interfaces(data->hw, + hwsim_send_nullfunc_ps, + data); + } else if (old_ps != PS_DISABLED && val == PS_DISABLED) { + ieee80211_iterate_active_interfaces(data->hw, + hwsim_send_nullfunc_no_ps, + data); + } + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(hwsim_fops_ps, hwsim_fops_ps_read, hwsim_fops_ps_write, + "%llu\n"); + + static int __init init_mac80211_hwsim(void) { int i, err = 0; @@ -634,6 +812,12 @@ static int __init init_mac80211_hwsim(void) wiphy_name(hw->wiphy), hw->wiphy->perm_addr); + data->debugfs = debugfs_create_dir("hwsim", + hw->wiphy->debugfsdir); + data->debugfs_ps = debugfs_create_file("ps", 0666, + data->debugfs, data, + &hwsim_fops_ps); + setup_timer(&data->beacon_timer, mac80211_hwsim_beacon, (unsigned long) hw); diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 9dc288b920c8..56b0eb25d927 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -669,6 +669,13 @@ struct ieee80211_cts { u8 ra[6]; } __attribute__ ((packed)); +struct ieee80211_pspoll { + __le16 frame_control; + __le16 aid; + u8 bssid[6]; + u8 ta[6]; +} __attribute__ ((packed)); + /** * struct ieee80211_bar - HT Block Ack Request * -- cgit v1.2.3 From b219cee191e7cfe88a695a57249a295d0d5b22e9 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 30 Oct 2008 13:33:55 -0700 Subject: cfg80211: make use of reg macros on REG_RULE Ensure regulatory converstion macros safely accept multiple arguments and make REG_RULE() use them. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/net/cfg80211.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 448023193e55..1d57835d73f2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -336,19 +336,19 @@ struct ieee80211_regdomain { struct ieee80211_reg_rule reg_rules[]; }; -#define MHZ_TO_KHZ(freq) (freq * 1000) -#define KHZ_TO_MHZ(freq) (freq / 1000) -#define DBI_TO_MBI(gain) (gain * 100) -#define MBI_TO_DBI(gain) (gain / 100) -#define DBM_TO_MBM(gain) (gain * 100) -#define MBM_TO_DBM(gain) (gain / 100) +#define MHZ_TO_KHZ(freq) ((freq) * 1000) +#define KHZ_TO_MHZ(freq) ((freq) / 1000) +#define DBI_TO_MBI(gain) ((gain) * 100) +#define MBI_TO_DBI(gain) ((gain) / 100) +#define DBM_TO_MBM(gain) ((gain) * 100) +#define MBM_TO_DBM(gain) ((gain) / 100) #define REG_RULE(start, end, bw, gain, eirp, reg_flags) { \ - .freq_range.start_freq_khz = (start) * 1000, \ - .freq_range.end_freq_khz = (end) * 1000, \ - .freq_range.max_bandwidth_khz = (bw) * 1000, \ - .power_rule.max_antenna_gain = (gain) * 100, \ - .power_rule.max_eirp = (eirp) * 100, \ + .freq_range.start_freq_khz = MHZ_TO_KHZ(start), \ + .freq_range.end_freq_khz = MHZ_TO_KHZ(end), \ + .freq_range.max_bandwidth_khz = MHZ_TO_KHZ(bw), \ + .power_rule.max_antenna_gain = DBI_TO_MBI(gain), \ + .power_rule.max_eirp = DBM_TO_MBM(eirp), \ .flags = reg_flags, \ } -- cgit v1.2.3 From fb28ad35906af2f042c94e2f9c0f898ef9acfa37 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Mon, 10 Nov 2008 13:55:14 -0800 Subject: net: struct device - replace bus_id with dev_name(), dev_set_name() Acked-by: Marcel Holtmann Acked-by: Greg Kroah-Hartman Signed-off-by: Kay Sievers Signed-off-by: David S. Miller --- drivers/net/3c59x.c | 4 ++-- drivers/net/defxx.c | 2 +- drivers/net/enc28j60.c | 2 +- drivers/net/fec_mpc52xx.c | 2 +- drivers/net/gianfar.c | 2 +- drivers/net/mlx4/mlx4_en.h | 8 ++++---- drivers/net/pasemi_mac.c | 3 ++- drivers/net/phy/mdio_bus.c | 4 ++-- drivers/net/phy/phy.c | 2 +- drivers/net/phy/phy_device.c | 4 ++-- drivers/net/sh_eth.c | 2 +- drivers/net/tg3.c | 4 ++-- drivers/net/tulip/de4x5.c | 4 ++-- drivers/net/ucc_geth.c | 4 ++-- drivers/net/wireless/libertas/defs.h | 2 +- drivers/net/wireless/orinoco/orinoco.c | 2 +- drivers/net/wireless/orinoco/orinoco_cs.c | 2 +- drivers/net/wireless/orinoco/spectrum_cs.c | 2 +- include/net/wireless.h | 4 ++-- net/atm/atm_sysfs.c | 2 +- net/bluetooth/hci_sysfs.c | 7 +++---- net/core/net-sysfs.c | 2 +- net/dsa/slave.c | 2 +- net/rfkill/rfkill.c | 5 ++--- net/wireless/core.c | 3 +-- 25 files changed, 39 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index b4168dfd6893..3893f505fb5f 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -1025,7 +1025,7 @@ static int __devinit vortex_probe1(struct device *gendev, } if ((edev = DEVICE_EISA(gendev))) { - print_name = edev->dev.bus_id; + print_name = dev_name(&edev->dev); } } @@ -2883,7 +2883,7 @@ static void vortex_get_drvinfo(struct net_device *dev, strcpy(info->bus_info, pci_name(VORTEX_PCI(vp))); } else { if (VORTEX_EISA(vp)) - sprintf(info->bus_info, vp->gendev->bus_id); + sprintf(info->bus_info, dev_name(vp->gendev)); else sprintf(info->bus_info, "EISA 0x%lx %d", dev->base_addr, dev->irq); diff --git a/drivers/net/defxx.c b/drivers/net/defxx.c index f07887435247..6e6583b609f7 100644 --- a/drivers/net/defxx.c +++ b/drivers/net/defxx.c @@ -511,7 +511,7 @@ static int __devinit dfx_register(struct device *bdev) int dfx_bus_pci = DFX_BUS_PCI(bdev); int dfx_bus_tc = DFX_BUS_TC(bdev); int dfx_use_mmio = DFX_MMIO || dfx_bus_tc; - char *print_name = bdev->bus_id; + const char *print_name = dev_name(bdev); struct net_device *dev; DFX_board_t *bp; /* board pointer */ resource_size_t bar_start = 0; /* pointer to port */ diff --git a/drivers/net/enc28j60.c b/drivers/net/enc28j60.c index d186a52cdb62..32c19790d013 100644 --- a/drivers/net/enc28j60.c +++ b/drivers/net/enc28j60.c @@ -1448,7 +1448,7 @@ enc28j60_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); strlcpy(info->version, DRV_VERSION, sizeof(info->version)); strlcpy(info->bus_info, - dev->dev.parent->bus_id, sizeof(info->bus_info)); + dev_name(dev->dev.parent), sizeof(info->bus_info)); } static int diff --git a/drivers/net/fec_mpc52xx.c b/drivers/net/fec_mpc52xx.c index 94054b4981f3..cd8e98b45ec5 100644 --- a/drivers/net/fec_mpc52xx.c +++ b/drivers/net/fec_mpc52xx.c @@ -216,7 +216,7 @@ static int mpc52xx_fec_init_phy(struct net_device *dev) struct phy_device *phydev; char phy_id[BUS_ID_SIZE]; - snprintf(phy_id, BUS_ID_SIZE, "%x:%02x", + snprintf(phy_id, sizeof(phy_id), "%x:%02x", (unsigned int)dev->base_addr, priv->phy_addr); priv->link = PHY_DOWN; diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index a091db393615..451f6b8b6163 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -546,7 +546,7 @@ static int init_phy(struct net_device *dev) priv->oldspeed = 0; priv->oldduplex = -1; - snprintf(phy_id, BUS_ID_SIZE, PHY_ID_FMT, priv->einfo->bus_id, priv->einfo->phy_id); + snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, priv->einfo->bus_id, priv->einfo->phy_id); interface = gfar_get_interface(dev); diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h index 11fb17c6e97b..cc022197e2a5 100644 --- a/drivers/net/mlx4/mlx4_en.h +++ b/drivers/net/mlx4/mlx4_en.h @@ -58,17 +58,17 @@ #define mlx4_dbg(mlevel, priv, format, arg...) \ if (NETIF_MSG_##mlevel & priv->msg_enable) \ printk(KERN_DEBUG "%s %s: " format , DRV_NAME ,\ - (&priv->mdev->pdev->dev)->bus_id , ## arg) + (dev_name(&priv->mdev->pdev->dev)) , ## arg) #define mlx4_err(mdev, format, arg...) \ printk(KERN_ERR "%s %s: " format , DRV_NAME ,\ - (&mdev->pdev->dev)->bus_id , ## arg) + (dev_name(&mdev->pdev->dev)) , ## arg) #define mlx4_info(mdev, format, arg...) \ printk(KERN_INFO "%s %s: " format , DRV_NAME ,\ - (&mdev->pdev->dev)->bus_id , ## arg) + (dev_name(&mdev->pdev->dev)) , ## arg) #define mlx4_warn(mdev, format, arg...) \ printk(KERN_WARNING "%s %s: " format , DRV_NAME ,\ - (&mdev->pdev->dev)->bus_id , ## arg) + (dev_name(&mdev->pdev->dev)) , ## arg) /* * Device constants diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c index b0270052c3a9..fcbf6ccd0a85 100644 --- a/drivers/net/pasemi_mac.c +++ b/drivers/net/pasemi_mac.c @@ -1105,7 +1105,8 @@ static int pasemi_mac_phy_init(struct net_device *dev) goto err; phy_id = *prop; - snprintf(mac->phy_id, BUS_ID_SIZE, "%x:%02x", (int)r.start, phy_id); + snprintf(mac->phy_id, sizeof(mac->phy_id), "%x:%02x", + (int)r.start, phy_id); of_node_put(phy_dn); diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index d0ed1ef284a8..6afd35f62d77 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -97,7 +97,7 @@ int mdiobus_register(struct mii_bus *bus) bus->dev.parent = bus->parent; bus->dev.class = &mdio_bus_class; bus->dev.groups = NULL; - memcpy(bus->dev.bus_id, bus->id, MII_BUS_ID_SIZE); + dev_set_name(&bus->dev, bus->id); err = device_register(&bus->dev); if (err) { @@ -191,7 +191,7 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr) phydev->dev.parent = bus->parent; phydev->dev.bus = &mdio_bus_type; - snprintf(phydev->dev.bus_id, BUS_ID_SIZE, PHY_ID_FMT, bus->id, addr); + dev_set_name(&phydev->dev, PHY_ID_FMT, bus->id, addr); phydev->bus = bus; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index df4e6257d4a7..e4ede6080c9d 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -45,7 +45,7 @@ */ void phy_print_status(struct phy_device *phydev) { - pr_info("PHY: %s - Link is %s", phydev->dev.bus_id, + pr_info("PHY: %s - Link is %s", dev_name(&phydev->dev), phydev->link ? "Up" : "Down"); if (phydev->link) printk(" - %d/%s", phydev->speed, diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index e11b03b2b25a..e976c1c60095 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -74,7 +74,7 @@ int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, if (!fixup) return -ENOMEM; - strncpy(fixup->bus_id, bus_id, BUS_ID_SIZE); + strlcpy(fixup->bus_id, bus_id, sizeof(fixup->bus_id)); fixup->phy_uid = phy_uid; fixup->phy_uid_mask = phy_uid_mask; fixup->run = run; @@ -109,7 +109,7 @@ EXPORT_SYMBOL(phy_register_fixup_for_id); */ static int phy_needs_fixup(struct phy_device *phydev, struct phy_fixup *fixup) { - if (strcmp(fixup->bus_id, phydev->dev.bus_id) != 0) + if (strcmp(fixup->bus_id, dev_name(&phydev->dev)) != 0) if (strcmp(fixup->bus_id, PHY_ANY_ID) != 0) return 0; diff --git a/drivers/net/sh_eth.c b/drivers/net/sh_eth.c index c51bfc57d405..077d796ccb70 100644 --- a/drivers/net/sh_eth.c +++ b/drivers/net/sh_eth.c @@ -799,7 +799,7 @@ static int sh_eth_phy_init(struct net_device *ndev) char phy_id[BUS_ID_SIZE]; struct phy_device *phydev = NULL; - snprintf(phy_id, BUS_ID_SIZE, PHY_ID_FMT, + snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, mdp->mii_bus->id , mdp->phy_id); mdp->link = PHY_DOWN; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 546b9eeaa171..e05849ee9000 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -1396,7 +1396,7 @@ static int tg3_phy_init(struct tg3 *tp) phydev = tp->mdio_bus->phy_map[PHY_ADDR]; /* Attach the MAC to the PHY. */ - phydev = phy_connect(tp->dev, phydev->dev.bus_id, tg3_adjust_link, + phydev = phy_connect(tp->dev, dev_name(&phydev->dev), tg3_adjust_link, phydev->dev_flags, phydev->interface); if (IS_ERR(phydev)) { printk(KERN_ERR "%s: Could not attach to PHY\n", tp->dev->name); @@ -13645,7 +13645,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, "%s: attached PHY driver [%s] (mii_bus:phy_addr=%s)\n", tp->dev->name, tp->mdio_bus->phy_map[PHY_ADDR]->drv->name, - tp->mdio_bus->phy_map[PHY_ADDR]->dev.bus_id); + dev_name(&tp->mdio_bus->phy_map[PHY_ADDR]->dev)); else printk(KERN_INFO "%s: attached PHY is %s (%s Ethernet) (WireSpeed[%d])\n", diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c index f8a45253eaf8..67bfd6f43366 100644 --- a/drivers/net/tulip/de4x5.c +++ b/drivers/net/tulip/de4x5.c @@ -1118,7 +1118,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev) } dev->base_addr = iobase; - printk ("%s: %s at 0x%04lx", gendev->bus_id, name, iobase); + printk ("%s: %s at 0x%04lx", dev_name(gendev), name, iobase); status = get_hw_addr(dev); printk(", h/w address %pM\n", dev->dev_addr); @@ -1153,7 +1153,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev) } } lp->fdx = lp->params.fdx; - sprintf(lp->adapter_name,"%s (%s)", name, gendev->bus_id); + sprintf(lp->adapter_name,"%s (%s)", name, dev_name(gendev)); lp->dma_size = (NUM_RX_DESC + NUM_TX_DESC) * sizeof(struct de4x5_desc); #if defined(__alpha__) || defined(__powerpc__) || defined(CONFIG_SPARC) || defined(DE4X5_DO_MEMCPY) diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c index 4931af736630..0a5b817fd7ac 100644 --- a/drivers/net/ucc_geth.c +++ b/drivers/net/ucc_geth.c @@ -1615,8 +1615,8 @@ static int init_phy(struct net_device *dev) priv->oldspeed = 0; priv->oldduplex = -1; - snprintf(phy_id, BUS_ID_SIZE, PHY_ID_FMT, priv->ug_info->mdio_bus, - priv->ug_info->phy_address); + snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, priv->ug_info->mdio_bus, + priv->ug_info->phy_address); phydev = phy_connect(dev, phy_id, &adjust_link, 0, priv->phy_interface); diff --git a/drivers/net/wireless/libertas/defs.h b/drivers/net/wireless/libertas/defs.h index 076a636e8f62..2d4666f26808 100644 --- a/drivers/net/wireless/libertas/defs.h +++ b/drivers/net/wireless/libertas/defs.h @@ -79,7 +79,7 @@ do { if ((lbs_debug & (grp)) == (grp)) \ #define lbs_deb_tx(fmt, args...) LBS_DEB_LL(LBS_DEB_TX, " tx", fmt, ##args) #define lbs_deb_fw(fmt, args...) LBS_DEB_LL(LBS_DEB_FW, " fw", fmt, ##args) #define lbs_deb_usb(fmt, args...) LBS_DEB_LL(LBS_DEB_USB, " usb", fmt, ##args) -#define lbs_deb_usbd(dev, fmt, args...) LBS_DEB_LL(LBS_DEB_USB, " usbd", "%s:" fmt, (dev)->bus_id, ##args) +#define lbs_deb_usbd(dev, fmt, args...) LBS_DEB_LL(LBS_DEB_USB, " usbd", "%s:" fmt, dev_name(dev), ##args) #define lbs_deb_cs(fmt, args...) LBS_DEB_LL(LBS_DEB_CS, " cs", fmt, ##args) #define lbs_deb_thread(fmt, args...) LBS_DEB_LL(LBS_DEB_THREAD, " thread", fmt, ##args) #define lbs_deb_sdio(fmt, args...) LBS_DEB_LL(LBS_DEB_SDIO, " sdio", fmt, ##args) diff --git a/drivers/net/wireless/orinoco/orinoco.c b/drivers/net/wireless/orinoco/orinoco.c index f4ea08f96970..072be44b37de 100644 --- a/drivers/net/wireless/orinoco/orinoco.c +++ b/drivers/net/wireless/orinoco/orinoco.c @@ -5987,7 +5987,7 @@ static void orinoco_get_drvinfo(struct net_device *dev, strncpy(info->version, DRIVER_VERSION, sizeof(info->version) - 1); strncpy(info->fw_version, priv->fw_name, sizeof(info->fw_version) - 1); if (dev->dev.parent) - strncpy(info->bus_info, dev->dev.parent->bus_id, + strncpy(info->bus_info, dev_name(dev->dev.parent), sizeof(info->bus_info) - 1); else snprintf(info->bus_info, sizeof(info->bus_info) - 1, diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c index 6fcf2bda7cdf..bf6a51da3b29 100644 --- a/drivers/net/wireless/orinoco/orinoco_cs.c +++ b/drivers/net/wireless/orinoco/orinoco_cs.c @@ -308,7 +308,7 @@ orinoco_cs_config(struct pcmcia_device *link) /* Finally, report what we've done */ printk(KERN_DEBUG "%s: " DRIVER_NAME " at %s, irq %d, io " - "0x%04x-0x%04x\n", dev->name, dev->dev.parent->bus_id, + "0x%04x-0x%04x\n", dev->name, dev_name(dev->dev.parent), link->irq.AssignedIRQ, link->io.BasePort1, link->io.BasePort1 + link->io.NumPorts1 - 1); return 0; diff --git a/drivers/net/wireless/orinoco/spectrum_cs.c b/drivers/net/wireless/orinoco/spectrum_cs.c index 852789ad34b3..0bae3dcf9d50 100644 --- a/drivers/net/wireless/orinoco/spectrum_cs.c +++ b/drivers/net/wireless/orinoco/spectrum_cs.c @@ -383,7 +383,7 @@ spectrum_cs_config(struct pcmcia_device *link) /* Finally, report what we've done */ printk(KERN_DEBUG "%s: " DRIVER_NAME " at %s, irq %d, io " - "0x%04x-0x%04x\n", dev->name, dev->dev.parent->bus_id, + "0x%04x-0x%04x\n", dev->name, dev_name(dev->dev.parent), link->irq.AssignedIRQ, link->io.BasePort1, link->io.BasePort1 + link->io.NumPorts1 - 1); diff --git a/include/net/wireless.h b/include/net/wireless.h index 17d4b582cf34..412351560b76 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -263,9 +263,9 @@ static inline struct device *wiphy_dev(struct wiphy *wiphy) /** * wiphy_name - get wiphy name */ -static inline char *wiphy_name(struct wiphy *wiphy) +static inline const char *wiphy_name(struct wiphy *wiphy) { - return wiphy->dev.bus_id; + return dev_name(&wiphy->dev); } /** diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index 1b88311f2130..b5674dc2083d 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -149,7 +149,7 @@ int atm_register_sysfs(struct atm_dev *adev) cdev->class = &atm_class; dev_set_drvdata(cdev, adev); - snprintf(cdev->bus_id, BUS_ID_SIZE, "%s%d", adev->type, adev->number); + dev_set_name(cdev, "%s%d", adev->type, adev->number); err = device_register(cdev); if (err < 0) return err; diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index f4f6615cad9f..f2bbb2f65434 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -113,8 +113,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn) conn->dev.class = bt_class; conn->dev.parent = &hdev->dev; - snprintf(conn->dev.bus_id, BUS_ID_SIZE, "%s:%d", - hdev->name, conn->handle); + dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); dev_set_drvdata(&conn->dev, conn); @@ -132,7 +131,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn) */ static int __match_tty(struct device *dev, void *data) { - return !strncmp(dev->bus_id, "rfcomm", 6); + return !strncmp(dev_name(dev), "rfcomm", 6); } static void del_conn(struct work_struct *work) @@ -421,7 +420,7 @@ int hci_register_sysfs(struct hci_dev *hdev) dev->class = bt_class; dev->parent = hdev->parent; - strlcpy(dev->bus_id, hdev->name, BUS_ID_SIZE); + dev_set_name(dev, hdev->name); dev_set_drvdata(dev, hdev); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 85cb8bdcfb8f..146dcfeb060e 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -494,7 +494,7 @@ int netdev_register_kobject(struct net_device *net) dev->groups = groups; BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); - strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); + dev_set_name(dev, net->name); #ifdef CONFIG_SYSFS *groups++ = &netstat_group; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 37616884b8a9..7384bad81652 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -284,7 +284,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, netif_carrier_off(slave_dev); if (p->phy != NULL) { - phy_attach(slave_dev, p->phy->dev.bus_id, + phy_attach(slave_dev, dev_name(&p->phy->dev), 0, PHY_INTERFACE_MODE_GMII); p->phy->autoneg = AUTONEG_ENABLE; diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 69f3a3b4dd61..ec26eae8004d 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -774,7 +774,7 @@ static void rfkill_led_trigger_register(struct rfkill *rfkill) int error; if (!rfkill->led_trigger.name) - rfkill->led_trigger.name = rfkill->dev.bus_id; + rfkill->led_trigger.name = dev_name(&rfkill->dev); if (!rfkill->led_trigger.activate) rfkill->led_trigger.activate = rfkill_led_trigger_activate; error = led_trigger_register(&rfkill->led_trigger); @@ -815,8 +815,7 @@ int __must_check rfkill_register(struct rfkill *rfkill) "badly initialized rfkill struct\n")) return -EINVAL; - snprintf(dev->bus_id, sizeof(dev->bus_id), - "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1); + dev_set_name(dev, "rfkill%ld", (long)atomic_inc_return(&rfkill_no) - 1); rfkill_led_trigger_register(rfkill); diff --git a/net/wireless/core.c b/net/wireless/core.c index 72825afe2bf6..39e3d10fccde 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -236,8 +236,7 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) mutex_unlock(&cfg80211_drv_mutex); /* give it a proper name */ - snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE, - PHY_NAME "%d", drv->idx); + dev_set_name(&drv->wiphy.dev, PHY_NAME "%d", drv->idx); mutex_init(&drv->mtx); mutex_init(&drv->devlist_mtx); -- cgit v1.2.3 From ae1e9130bfb9ad55eb97ec3fb17a122b7a118f98 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 11 Nov 2008 09:05:16 +0100 Subject: sched: rename SCHED_NO_NO_OMIT_FRAME_POINTER => SCHED_OMIT_FRAME_POINTER Impact: cleanup, change .config option name We had this ugly config name for a long time for hysteric raisons. Rename it to a saner name. We still cannot get rid of it completely, until /proc//stack usage replaces WCHAN usage for good. We'll be able to do that in the v2.6.29/v2.6.30 timeframe. Signed-off-by: Ingo Molnar --- arch/ia64/Kconfig | 2 +- arch/m32r/Kconfig | 2 +- arch/mips/Kconfig | 2 +- arch/powerpc/Kconfig | 2 +- arch/x86/Kconfig | 2 +- include/asm-m32r/system.h | 2 +- kernel/Makefile | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 27eec71429b0..59d12788b60c 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -99,7 +99,7 @@ config GENERIC_IOMAP bool default y -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index dbaed4a63815..29047d5c259a 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -273,7 +273,7 @@ config GENERIC_CALIBRATE_DELAY bool default y -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f4af967a6b30..a5255e7c79e0 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -653,7 +653,7 @@ config GENERIC_CMOS_UPDATE bool default y -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 525c13a4de93..adb23ea1c1ef 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -141,7 +141,7 @@ config GENERIC_NVRAM bool default y if PPC32 -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1d5550d19b66..74db682ec1ce 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -364,7 +364,7 @@ config X86_RDC321X as R-8610-(G). If you don't have one of these chips, you should say N here. -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" depends on X86 diff --git a/include/asm-m32r/system.h b/include/asm-m32r/system.h index 70a57c8c002b..c980f5ba8de7 100644 --- a/include/asm-m32r/system.h +++ b/include/asm-m32r/system.h @@ -23,7 +23,7 @@ */ #if defined(CONFIG_FRAME_POINTER) || \ - !defined(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER) + !defined(CONFIG_SCHED_OMIT_FRAME_POINTER) #define M32R_PUSH_FP " push fp\n" #define M32R_POP_FP " pop fp\n" #else diff --git a/kernel/Makefile b/kernel/Makefile index e1af03972148..46e67a398495 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -91,7 +91,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o -ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) +ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is # needed for x86 only. Why this used to be enabled for all architectures is beyond # me. I suspect most platforms don't need this, but until we know that for sure -- cgit v1.2.3 From caf4b323b02a16c92fba449952ac6515ddc76d7a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Nov 2008 07:03:45 +0100 Subject: tracing, x86: add low level support for ftrace return tracing Impact: add infrastructure for function-return tracing Add low level support for ftrace return tracing. This plug-in stores return addresses on the thread_info structure of the current task. The index of the current return address is initialized when the task is the first one (init) and when a process forks (the child). It is not needed when a task does a sys_execve because after this syscall, it still needs to return on the kernel functions it called. Note that the code of return_to_handler has been suggested by Steven Rostedt as almost all of the ideas of improvements in this V3. For purpose of security, arch/x86/kernel/process_32.c is not traced because __switch_to() changes the current task during its execution. That could cause inconsistency in the stored return address of this function even if I didn't have any crash after testing with tracing on this function enabled. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/include/asm/ftrace.h | 26 ++++++ arch/x86/include/asm/thread_info.h | 24 +++++ arch/x86/kernel/Makefile | 6 ++ arch/x86/kernel/entry_32.S | 33 +++++++ arch/x86/kernel/ftrace.c | 181 +++++++++++++++++++++++++++++++++++-- include/linux/ftrace.h | 20 ++++ include/linux/ftrace_irq.h | 2 +- include/linux/sched.h | 11 +++ kernel/Makefile | 4 + 10 files changed, 300 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 27b8a3a39911..ca91e50bdb10 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -11,6 +11,7 @@ config 64BIT config X86_32 def_bool !64BIT + select HAVE_FUNCTION_RET_TRACER config X86_64 def_bool 64BIT diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index f8173ed1c970..9b6a1fa19e70 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -20,4 +20,30 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_FUNCTION_RET_TRACER +#define FTRACE_RET_STACK_SIZE 20 + +#ifndef __ASSEMBLY__ + +/* + * Stack of return addresses for functions + * of a thread. + * Used in struct thread_info + */ +struct ftrace_ret_stack { + unsigned long ret; + unsigned long func; + unsigned long long calltime; +}; + +/* + * Primary handler of a function return. + * It relays on ftrace_return_to_handler. + * Defined in entry32.S + */ +extern void return_to_handler(void); + +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_FUNCTION_RET_TRACER */ + #endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e44d379faad2..a71158369fd4 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -20,6 +20,7 @@ struct task_struct; struct exec_domain; #include +#include struct thread_info { struct task_struct *task; /* main task structure */ @@ -38,8 +39,30 @@ struct thread_info { */ __u8 supervisor_stack[0]; #endif + +#ifdef CONFIG_FUNCTION_RET_TRACER + /* Index of current stored adress in ret_stack */ + int curr_ret_stack; + /* Stack of return addresses for return function tracing */ + struct ftrace_ret_stack ret_stack[FTRACE_RET_STACK_SIZE]; +#endif }; +#ifdef CONFIG_FUNCTION_RET_TRACER +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = 1, \ + .addr_limit = KERNEL_DS, \ + .restart_block = { \ + .fn = do_no_restart_syscall, \ + }, \ + .curr_ret_stack = -1,\ +} +#else #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ @@ -52,6 +75,7 @@ struct thread_info { .fn = do_no_restart_syscall, \ }, \ } +#endif #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index e489ff9cb3e2..1d8ed95da846 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -14,6 +14,11 @@ CFLAGS_REMOVE_paravirt-spinlocks.o = -pg CFLAGS_REMOVE_ftrace.o = -pg endif +ifdef CONFIG_FUNCTION_RET_TRACER +# Don't trace __switch_to() but let it for function tracer +CFLAGS_REMOVE_process_32.o = -pg +endif + # # vsyscalls (which work on the user stack) should have # no stack-protector checks: @@ -65,6 +70,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_RET_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 9134de814c97..9a0ac85946db 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1188,6 +1188,10 @@ ENTRY(mcount) cmpl $ftrace_stub, ftrace_trace_function jnz trace +#ifdef CONFIG_FUNCTION_RET_TRACER + cmpl $ftrace_stub, ftrace_function_return + jnz trace_return +#endif .globl ftrace_stub ftrace_stub: ret @@ -1206,8 +1210,37 @@ trace: popl %edx popl %ecx popl %eax + jmp ftrace_stub +#ifdef CONFIG_FUNCTION_RET_TRACER +trace_return: + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %eax + pushl %eax + lea 0x4(%ebp), %eax + pushl %eax + call prepare_ftrace_return + addl $8, %esp + popl %edx + popl %ecx + popl %eax jmp ftrace_stub + +.globl return_to_handler +return_to_handler: + pushl $0 + pushl %eax + pushl %ecx + pushl %edx + call ftrace_return_to_handler + movl %eax, 0xc(%esp) + popl %edx + popl %ecx + popl %eax + ret +#endif /* CONFIG_FUNCTION_RET_TRACER */ END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 69149337f2fe..d68033bba223 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -18,10 +18,173 @@ #include #include +#include #include +#include -static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; + +#ifdef CONFIG_FUNCTION_RET_TRACER + +/* + * These functions are picked from those used on + * this page for dynamic ftrace. They have been + * simplified to ignore all traces in NMI context. + */ +static atomic_t in_nmi; + +void ftrace_nmi_enter(void) +{ + atomic_inc(&in_nmi); +} + +void ftrace_nmi_exit(void) +{ + atomic_dec(&in_nmi); +} + +/* + * Synchronize accesses to return adresses stack with + * interrupts. + */ +static raw_spinlock_t ret_stack_lock; + +/* Add a function return address to the trace stack on thread info.*/ +static int push_return_trace(unsigned long ret, unsigned long long time, + unsigned long func) +{ + int index; + struct thread_info *ti; + unsigned long flags; + int err = 0; + + raw_local_irq_save(flags); + __raw_spin_lock(&ret_stack_lock); + + ti = current_thread_info(); + /* The return trace stack is full */ + if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) { + err = -EBUSY; + goto out; + } + + index = ++ti->curr_ret_stack; + ti->ret_stack[index].ret = ret; + ti->ret_stack[index].func = func; + ti->ret_stack[index].calltime = time; + +out: + __raw_spin_unlock(&ret_stack_lock); + raw_local_irq_restore(flags); + return err; +} + +/* Retrieve a function return address to the trace stack on thread info.*/ +static void pop_return_trace(unsigned long *ret, unsigned long long *time, + unsigned long *func) +{ + struct thread_info *ti; + int index; + unsigned long flags; + + raw_local_irq_save(flags); + __raw_spin_lock(&ret_stack_lock); + + ti = current_thread_info(); + index = ti->curr_ret_stack; + *ret = ti->ret_stack[index].ret; + *func = ti->ret_stack[index].func; + *time = ti->ret_stack[index].calltime; + ti->curr_ret_stack--; + + __raw_spin_unlock(&ret_stack_lock); + raw_local_irq_restore(flags); +} + +/* + * Send the trace to the ring-buffer. + * @return the original return address. + */ +unsigned long ftrace_return_to_handler(void) +{ + struct ftrace_retfunc trace; + pop_return_trace(&trace.ret, &trace.calltime, &trace.func); + trace.rettime = cpu_clock(raw_smp_processor_id()); + ftrace_function_return(&trace); + + return trace.ret; +} + +/* + * Hook the return address and push it in the stack of return addrs + * in current thread info. + */ +asmlinkage +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) +{ + unsigned long old; + unsigned long long calltime; + int faulted; + unsigned long return_hooker = (unsigned long) + &return_to_handler; + + /* Nmi's are currently unsupported */ + if (atomic_read(&in_nmi)) + return; + + /* + * Protect against fault, even if it shouldn't + * happen. This tool is too much intrusive to + * ignore such a protection. + */ + asm volatile( + "1: movl (%[parent_old]), %[old]\n" + "2: movl %[return_hooker], (%[parent_replaced])\n" + " movl $0, %[faulted]\n" + + ".section .fixup, \"ax\"\n" + "3: movl $1, %[faulted]\n" + ".previous\n" + + ".section __ex_table, \"a\"\n" + " .long 1b, 3b\n" + " .long 2b, 3b\n" + ".previous\n" + + : [parent_replaced] "=rm" (parent), [old] "=r" (old), + [faulted] "=r" (faulted) + : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) + : "memory" + ); + + if (WARN_ON(faulted)) { + unregister_ftrace_return(); + return; + } + + if (WARN_ON(!__kernel_text_address(old))) { + unregister_ftrace_return(); + *parent = old; + return; + } + + calltime = cpu_clock(raw_smp_processor_id()); + + if (push_return_trace(old, calltime, self_addr) == -EBUSY) + *parent = old; +} + +static int __init init_ftrace_function_return(void) +{ + ret_stack_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + return 0; +} +device_initcall(init_ftrace_function_return); + + +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; @@ -31,17 +194,11 @@ union ftrace_code_union { } __attribute__((packed)); }; - static int ftrace_calc_offset(long ip, long addr) { return (int)(addr - ip); } -unsigned char *ftrace_nop_replace(void) -{ - return ftrace_nop; -} - unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static union ftrace_code_union calc; @@ -183,6 +340,15 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) } + + +static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; + +unsigned char *ftrace_nop_replace(void) +{ + return ftrace_nop; +} + int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) @@ -292,3 +458,4 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } +#endif diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1f5608c11023..dcbbf72a88b1 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -267,6 +267,26 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } #endif +/* + * Structure that defines a return function trace. + */ +struct ftrace_retfunc { + unsigned long ret; /* Return address */ + unsigned long func; /* Current function */ + unsigned long long calltime; + unsigned long long rettime; +}; + +#ifdef CONFIG_FUNCTION_RET_TRACER +/* Type of a callback handler of tracing return function */ +typedef void (*trace_function_return_t)(struct ftrace_retfunc *); + +extern void register_ftrace_return(trace_function_return_t func); +/* The current handler in use */ +extern trace_function_return_t ftrace_function_return; +extern void unregister_ftrace_return(void); +#endif + /* * Structure which defines the trace of an initcall. * You don't have to fill the func field since it is diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index b1299d6729f2..0b4df55d7a74 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -2,7 +2,7 @@ #define _LINUX_FTRACE_IRQ_H -#ifdef CONFIG_DYNAMIC_FTRACE +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_RET_TRACER) extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else diff --git a/include/linux/sched.h b/include/linux/sched.h index 295b7c756ca6..df77abe860c9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2005,6 +2005,17 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct { *task_thread_info(p) = *task_thread_info(org); task_thread_info(p)->task = p; + +#ifdef CONFIG_FUNCTION_RET_TRACER + /* + * When fork() creates a child process, this function is called. + * But the child task may not inherit the return adresses traced + * by the return function tracer because it will directly execute + * in userspace and will not return to kernel functions its parent + * used. + */ + task_thread_info(p)->curr_ret_stack = -1; +#endif } static inline unsigned long *end_of_stack(struct task_struct *p) diff --git a/kernel/Makefile b/kernel/Makefile index 9a3ec66a9d84..af3be57acbbb 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -23,6 +23,10 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg CFLAGS_REMOVE_sched.o = -mno-spe -pg endif +ifdef CONFIG_FUNCTION_RET_TRACER +CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address() +CFLAGS_REMOVE_module.o = -pg # For __module_text_address() +endif obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o -- cgit v1.2.3 From 9d36be76c55ad2c2bb29683b752b0d9ad2e4eeef Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 21:48:07 +1100 Subject: Document the order of arguments for cap_issubset. It's not instantly clear which order the argument should be in. So give an example. Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/capability.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/capability.h b/include/linux/capability.h index 5bc145bd759a..b5750d0b96e0 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -457,6 +457,13 @@ static inline int cap_isclear(const kernel_cap_t a) return 1; } +/* + * Check if "a" is a subset of "set". + * return 1 if ALL of the capabilities in "a" are also in "set" + * cap_issubset(0101, 1111) will return 1 + * return 0 if ANY of the capabilities in "a" are not in "set" + * cap_issubset(1111, 0101) will return 0 + */ static inline int cap_issubset(const kernel_cap_t a, const kernel_cap_t set) { kernel_cap_t dest; -- cgit v1.2.3 From c0b004413a46a0a5744e6d2b85220fe9d2c33d48 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 21:48:10 +1100 Subject: This patch add a generic cpu endian caps structure and externally available functions which retrieve fcaps information from disk. This information is necessary so fcaps information can be collected and recorded by the audit system. Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/capability.h | 7 +++ security/commoncap.c | 129 +++++++++++++++++++++++++-------------------- 2 files changed, 78 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/include/linux/capability.h b/include/linux/capability.h index b5750d0b96e0..d567af247ed8 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -99,6 +99,13 @@ typedef struct kernel_cap_struct { __u32 cap[_KERNEL_CAPABILITY_U32S]; } kernel_cap_t; +/* exact same as vfs_cap_data but in cpu endian and always filled completely */ +struct cpu_vfs_cap_data { + __u32 magic_etc; + kernel_cap_t permitted; + kernel_cap_t inheritable; +}; + #define _USER_CAP_HEADER_SIZE (sizeof(struct __user_cap_header_struct)) #define _KERNEL_CAP_T_SIZE (sizeof(kernel_cap_t)) diff --git a/security/commoncap.c b/security/commoncap.c index f88119cb2bc2..d7eff5797b91 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -202,17 +202,70 @@ int cap_inode_killpriv(struct dentry *dentry) return inode->i_op->removexattr(dentry, XATTR_NAME_CAPS); } -static inline int cap_from_disk(struct vfs_cap_data *caps, - struct linux_binprm *bprm, unsigned size) +static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, + struct linux_binprm *bprm) { + unsigned i; + int ret = 0; + + if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE) + bprm->cap_effective = true; + else + bprm->cap_effective = false; + + CAP_FOR_EACH_U32(i) { + __u32 permitted = caps->permitted.cap[i]; + __u32 inheritable = caps->inheritable.cap[i]; + + /* + * pP' = (X & fP) | (pI & fI) + */ + bprm->cap_post_exec_permitted.cap[i] = + (current->cap_bset.cap[i] & permitted) | + (current->cap_inheritable.cap[i] & inheritable); + + if (permitted & ~bprm->cap_post_exec_permitted.cap[i]) { + /* + * insufficient to execute correctly + */ + ret = -EPERM; + } + } + + /* + * For legacy apps, with no internal support for recognizing they + * do not have enough capabilities, we return an error if they are + * missing some "forced" (aka file-permitted) capabilities. + */ + return bprm->cap_effective ? ret : 0; +} + +int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps) +{ + struct inode *inode = dentry->d_inode; __u32 magic_etc; unsigned tocopy, i; - int ret; + int size; + struct vfs_cap_data caps; + + memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data)); + + if (!inode || !inode->i_op || !inode->i_op->getxattr) + return -ENODATA; + + size = inode->i_op->getxattr((struct dentry *)dentry, XATTR_NAME_CAPS, &caps, + XATTR_CAPS_SZ); + if (size == -ENODATA || size == -EOPNOTSUPP) { + /* no data, that's ok */ + return -ENODATA; + } + if (size < 0) + return size; if (size < sizeof(magic_etc)) return -EINVAL; - magic_etc = le32_to_cpu(caps->magic_etc); + cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps.magic_etc); switch ((magic_etc & VFS_CAP_REVISION_MASK)) { case VFS_CAP_REVISION_1: @@ -229,46 +282,13 @@ static inline int cap_from_disk(struct vfs_cap_data *caps, return -EINVAL; } - if (magic_etc & VFS_CAP_FLAGS_EFFECTIVE) { - bprm->cap_effective = true; - } else { - bprm->cap_effective = false; - } - - ret = 0; - CAP_FOR_EACH_U32(i) { - __u32 value_cpu; - - if (i >= tocopy) { - /* - * Legacy capability sets have no upper bits - */ - bprm->cap_post_exec_permitted.cap[i] = 0; - continue; - } - /* - * pP' = (X & fP) | (pI & fI) - */ - value_cpu = le32_to_cpu(caps->data[i].permitted); - bprm->cap_post_exec_permitted.cap[i] = - (current->cap_bset.cap[i] & value_cpu) | - (current->cap_inheritable.cap[i] & - le32_to_cpu(caps->data[i].inheritable)); - if (value_cpu & ~bprm->cap_post_exec_permitted.cap[i]) { - /* - * insufficient to execute correctly - */ - ret = -EPERM; - } + if (i >= tocopy) + break; + cpu_caps->permitted.cap[i] = le32_to_cpu(caps.data[i].permitted); + cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable); } - - /* - * For legacy apps, with no internal support for recognizing they - * do not have enough capabilities, we return an error if they are - * missing some "forced" (aka file-permitted) capabilities. - */ - return bprm->cap_effective ? ret : 0; + return 0; } /* Locate any VFS capabilities: */ @@ -276,8 +296,7 @@ static int get_file_caps(struct linux_binprm *bprm) { struct dentry *dentry; int rc = 0; - struct vfs_cap_data vcaps; - struct inode *inode; + struct cpu_vfs_cap_data vcaps; bprm_clear_caps(bprm); @@ -288,24 +307,18 @@ static int get_file_caps(struct linux_binprm *bprm) return 0; dentry = dget(bprm->file->f_dentry); - inode = dentry->d_inode; - if (!inode->i_op || !inode->i_op->getxattr) - goto out; - rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &vcaps, - XATTR_CAPS_SZ); - if (rc == -ENODATA || rc == -EOPNOTSUPP) { - /* no data, that's ok */ - rc = 0; + rc = get_vfs_caps_from_disk(dentry, &vcaps); + if (rc < 0) { + if (rc == -EINVAL) + printk(KERN_NOTICE "%s: get_vfs_caps_from_disk returned %d for %s\n", + __func__, rc, bprm->filename); + else if (rc == -ENODATA) + rc = 0; goto out; } - if (rc < 0) - goto out; - rc = cap_from_disk(&vcaps, bprm, rc); - if (rc == -EINVAL) - printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", - __func__, rc, bprm->filename); + rc = bprm_caps_from_vfs_caps(&vcaps, bprm); out: dput(dentry); -- cgit v1.2.3 From 851f7ff56d9c21272f289dd85fb3f1b6cf7a6e10 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 21:48:14 +1100 Subject: This patch will print cap_permitted and cap_inheritable data in the PATH records of any file that has file capabilities set. Files which do not have fcaps set will not have different PATH records. An example audit record if you run: setcap "cap_net_admin+pie" /bin/bash /bin/bash type=SYSCALL msg=audit(1225741937.363:230): arch=c000003e syscall=59 success=yes exit=0 a0=2119230 a1=210da30 a2=20ee290 a3=8 items=2 ppid=2149 pid=2923 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=3 comm="ping" exe="/bin/ping" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=EXECVE msg=audit(1225741937.363:230): argc=2 a0="ping" a1="www.google.com" type=CWD msg=audit(1225741937.363:230): cwd="/root" type=PATH msg=audit(1225741937.363:230): item=0 name="/bin/ping" inode=49256 dev=fd:00 mode=0104755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ping_exec_t:s0 cap_fp=0000000000002000 cap_fi=0000000000002000 cap_fe=1 cap_fver=2 type=PATH msg=audit(1225741937.363:230): item=1 name=(null) inode=507915 dev=fd:00 mode=0100755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ld_so_t:s0 Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/capability.h | 5 +++ kernel/auditsc.c | 82 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 82 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/capability.h b/include/linux/capability.h index d567af247ed8..0f1950181102 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -53,6 +53,7 @@ typedef struct __user_cap_data_struct { #define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX #define VFS_CAP_REVISION_MASK 0xFF000000 +#define VFS_CAP_REVISION_SHIFT 24 #define VFS_CAP_FLAGS_MASK ~VFS_CAP_REVISION_MASK #define VFS_CAP_FLAGS_EFFECTIVE 0x000001 @@ -534,6 +535,10 @@ kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); extern int capable(int cap); +/* audit system wants to get cap info from files as well */ +struct dentry; +extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); + #endif /* __KERNEL__ */ #endif /* !_LINUX_CAPABILITY_H */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index cf5bc2f5f9c3..de7e9bcba9ae 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -65,6 +65,7 @@ #include #include #include +#include #include "audit.h" @@ -84,6 +85,15 @@ int audit_n_rules; /* determines whether we collect data for signals sent */ int audit_signals; +struct audit_cap_data { + kernel_cap_t permitted; + kernel_cap_t inheritable; + union { + unsigned int fE; /* effective bit of a file capability */ + kernel_cap_t effective; /* effective set of a process */ + }; +}; + /* When fs/namei.c:getname() is called, we store the pointer in name and * we don't let putname() free it (instead we free all of the saved * pointers at syscall exit time). @@ -100,6 +110,8 @@ struct audit_names { gid_t gid; dev_t rdev; u32 osid; + struct audit_cap_data fcap; + unsigned int fcap_ver; }; struct audit_aux_data { @@ -1171,6 +1183,35 @@ static void audit_log_execve_info(struct audit_context *context, kfree(buf); } +static void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap) +{ + int i; + + audit_log_format(ab, " %s=", prefix); + CAP_FOR_EACH_U32(i) { + audit_log_format(ab, "%08x", cap->cap[(_KERNEL_CAPABILITY_U32S-1) - i]); + } +} + +static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) +{ + kernel_cap_t *perm = &name->fcap.permitted; + kernel_cap_t *inh = &name->fcap.inheritable; + int log = 0; + + if (!cap_isclear(*perm)) { + audit_log_cap(ab, "cap_fp", perm); + log = 1; + } + if (!cap_isclear(*inh)) { + audit_log_cap(ab, "cap_fi", inh); + log = 1; + } + + if (log) + audit_log_format(ab, " cap_fe=%d cap_fver=%x", name->fcap.fE, name->fcap_ver); +} + static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { int i, call_panic = 0; @@ -1421,6 +1462,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts } } + audit_log_fcaps(ab, n); + audit_log_end(ab); } @@ -1787,8 +1830,36 @@ static int audit_inc_name_count(struct audit_context *context, return 0; } + +static inline int audit_copy_fcaps(struct audit_names *name, const struct dentry *dentry) +{ + struct cpu_vfs_cap_data caps; + int rc; + + memset(&name->fcap.permitted, 0, sizeof(kernel_cap_t)); + memset(&name->fcap.inheritable, 0, sizeof(kernel_cap_t)); + name->fcap.fE = 0; + name->fcap_ver = 0; + + if (!dentry) + return 0; + + rc = get_vfs_caps_from_disk(dentry, &caps); + if (rc) + return rc; + + name->fcap.permitted = caps.permitted; + name->fcap.inheritable = caps.inheritable; + name->fcap.fE = !!(caps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); + name->fcap_ver = (caps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; + + return 0; +} + + /* Copy inode data into an audit_names. */ -static void audit_copy_inode(struct audit_names *name, const struct inode *inode) +static void audit_copy_inode(struct audit_names *name, const struct dentry *dentry, + const struct inode *inode) { name->ino = inode->i_ino; name->dev = inode->i_sb->s_dev; @@ -1797,6 +1868,7 @@ static void audit_copy_inode(struct audit_names *name, const struct inode *inode name->gid = inode->i_gid; name->rdev = inode->i_rdev; security_inode_getsecid(inode, &name->osid); + audit_copy_fcaps(name, dentry); } /** @@ -1831,7 +1903,7 @@ void __audit_inode(const char *name, const struct dentry *dentry) context->names[idx].name = NULL; } handle_path(dentry); - audit_copy_inode(&context->names[idx], inode); + audit_copy_inode(&context->names[idx], dentry, inode); } /** @@ -1892,7 +1964,7 @@ void __audit_inode_child(const char *dname, const struct dentry *dentry, if (!strcmp(dname, n->name) || !audit_compare_dname_path(dname, n->name, &dirlen)) { if (inode) - audit_copy_inode(n, inode); + audit_copy_inode(n, NULL, inode); else n->ino = (unsigned long)-1; found_child = n->name; @@ -1906,7 +1978,7 @@ add_names: return; idx = context->name_count - 1; context->names[idx].name = NULL; - audit_copy_inode(&context->names[idx], parent); + audit_copy_inode(&context->names[idx], NULL, parent); } if (!found_child) { @@ -1927,7 +1999,7 @@ add_names: } if (inode) - audit_copy_inode(&context->names[idx], inode); + audit_copy_inode(&context->names[idx], NULL, inode); else context->names[idx].ino = (unsigned long)-1; } -- cgit v1.2.3 From 3fc689e96c0c90b6fede5946d6c31075e9464f69 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 21:48:18 +1100 Subject: Any time fcaps or a setuid app under SECURE_NOROOT is used to result in a non-zero pE we will crate a new audit record which contains the entire set of known information about the executable in question, fP, fI, fE, fversion and includes the process's pE, pI, pP. Before and after the bprm capability are applied. This record type will only be emitted from execve syscalls. an example of making ping use fcaps instead of setuid: setcap "cat_net_raw+pe" /bin/ping type=SYSCALL msg=audit(1225742021.015:236): arch=c000003e syscall=59 success=yes exit=0 a0=1457f30 a1=14606b0 a2=1463940 a3=321b770a70 items=2 ppid=2929 pid=2963 auid=0 uid=500 gid=500 euid=500 suid=500 fsuid=500 egid=500 sgid=500 fsgid=500 tty=pts0 ses=3 comm="ping" exe="/bin/ping" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=UNKNOWN[1321] msg=audit(1225742021.015:236): fver=2 fp=0000000000002000 fi=0000000000000000 fe=1 old_pp=0000000000000000 old_pi=0000000000000000 old_pe=0000000000000000 new_pp=0000000000002000 new_pi=0000000000000000 new_pe=0000000000002000 type=EXECVE msg=audit(1225742021.015:236): argc=2 a0="ping" a1="127.0.0.1" type=CWD msg=audit(1225742021.015:236): cwd="/home/test" type=PATH msg=audit(1225742021.015:236): item=0 name="/bin/ping" inode=49256 dev=fd:00 mode=0100755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ping_exec_t:s0 cap_fp=0000000000002000 cap_fe=1 cap_fver=2 type=PATH msg=audit(1225742021.015:236): item=1 name=(null) inode=507915 dev=fd:00 mode=0100755 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:ld_so_t:s0 Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/audit.h | 26 ++++++++++++++++++++ kernel/auditsc.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++ security/commoncap.c | 23 ++++++++++++++++- 3 files changed, 116 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 6272a395d43c..8cfb9feb2a05 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -99,6 +99,7 @@ #define AUDIT_OBJ_PID 1318 /* ptrace target */ #define AUDIT_TTY 1319 /* Input on an administrative TTY */ #define AUDIT_EOE 1320 /* End of multi-record event */ +#define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ @@ -452,6 +453,7 @@ extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_pr extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout); extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification); extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); +extern void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE); static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -501,6 +503,29 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) return __audit_mq_getsetattr(mqdes, mqstat); return 0; } + +/* + * ieieeeeee, an audit function without a return code! + * + * This function might fail! I decided that it didn't matter. We are too late + * to fail the syscall and the information isn't REQUIRED for any purpose. It's + * just nice to have. We should be able to look at past audit logs to figure + * out this process's current cap set along with the fcaps from the PATH record + * and use that to come up with the final set. Yeah, its ugly, but all the info + * is still in the audit log. So I'm not going to bother mentioning we failed + * if we couldn't allocate memory. + * + * If someone changes their mind they could create the aux record earlier and + * then search here and use that earlier allocation. But I don't wanna. + * + * -Eric + */ +static inline void audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE) +{ + if (unlikely(!audit_dummy_context())) + __audit_log_bprm_fcaps(bprm, pP, pE); +} + extern int audit_n_rules; extern int audit_signals; #else @@ -532,6 +557,7 @@ extern int audit_signals; #define audit_mq_timedreceive(d,l,p,t) ({ 0; }) #define audit_mq_notify(d,n) ({ 0; }) #define audit_mq_getsetattr(d,s) ({ 0; }) +#define audit_log_bprm_fcaps(b, p, e) do { ; } while (0) #define audit_ptrace(t) ((void)0) #define audit_n_rules 0 #define audit_signals 0 diff --git a/kernel/auditsc.c b/kernel/auditsc.c index de7e9bcba9ae..3229cd4206f5 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -196,6 +196,14 @@ struct audit_aux_data_pids { int pid_count; }; +struct audit_aux_data_bprm_fcaps { + struct audit_aux_data d; + struct audit_cap_data fcap; + unsigned int fcap_ver; + struct audit_cap_data old_pcap; + struct audit_cap_data new_pcap; +}; + struct audit_tree_refs { struct audit_tree_refs *next; struct audit_chunk *c[31]; @@ -1375,6 +1383,20 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_format(ab, "fd0=%d fd1=%d", axs->fd[0], axs->fd[1]); break; } + case AUDIT_BPRM_FCAPS: { + struct audit_aux_data_bprm_fcaps *axs = (void *)aux; + audit_log_format(ab, "fver=%x", axs->fcap_ver); + audit_log_cap(ab, "fp", &axs->fcap.permitted); + audit_log_cap(ab, "fi", &axs->fcap.inheritable); + audit_log_format(ab, " fe=%d", axs->fcap.fE); + audit_log_cap(ab, "old_pp", &axs->old_pcap.permitted); + audit_log_cap(ab, "old_pi", &axs->old_pcap.inheritable); + audit_log_cap(ab, "old_pe", &axs->old_pcap.effective); + audit_log_cap(ab, "new_pp", &axs->new_pcap.permitted); + audit_log_cap(ab, "new_pi", &axs->new_pcap.inheritable); + audit_log_cap(ab, "new_pe", &axs->new_pcap.effective); + break; } + } audit_log_end(ab); } @@ -2501,6 +2523,52 @@ int __audit_signal_info(int sig, struct task_struct *t) return 0; } +/** + * __audit_log_bprm_fcaps - store information about a loading bprm and relevant fcaps + * @bprm pointer to the bprm being processed + * @caps the caps read from the disk + * + * Simply check if the proc already has the caps given by the file and if not + * store the priv escalation info for later auditing at the end of the syscall + * + * this can fail and we don't care. See the note in audit.h for + * audit_log_bprm_fcaps() for my explaination.... + * + * -Eric + */ +void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE) +{ + struct audit_aux_data_bprm_fcaps *ax; + struct audit_context *context = current->audit_context; + struct cpu_vfs_cap_data vcaps; + struct dentry *dentry; + + ax = kmalloc(sizeof(*ax), GFP_KERNEL); + if (!ax) + return; + + ax->d.type = AUDIT_BPRM_FCAPS; + ax->d.next = context->aux; + context->aux = (void *)ax; + + dentry = dget(bprm->file->f_dentry); + get_vfs_caps_from_disk(dentry, &vcaps); + dput(dentry); + + ax->fcap.permitted = vcaps.permitted; + ax->fcap.inheritable = vcaps.inheritable; + ax->fcap.fE = !!(vcaps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); + ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; + + ax->old_pcap.permitted = *pP; + ax->old_pcap.inheritable = current->cap_inheritable; + ax->old_pcap.effective = *pE; + + ax->new_pcap.permitted = current->cap_permitted; + ax->new_pcap.inheritable = current->cap_inheritable; + ax->new_pcap.effective = current->cap_effective; +} + /** * audit_core_dumps - record information about processes that end abnormally * @signr: signal value diff --git a/security/commoncap.c b/security/commoncap.c index d7eff5797b91..d45393380997 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -376,6 +377,9 @@ int cap_bprm_set_security (struct linux_binprm *bprm) void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) { + kernel_cap_t pP = current->cap_permitted; + kernel_cap_t pE = current->cap_effective; + if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || !cap_issubset(bprm->cap_post_exec_permitted, current->cap_permitted)) { @@ -409,7 +413,24 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) cap_clear(current->cap_effective); } - /* AUD: Audit candidate if current->cap_effective is set */ + /* + * Audit candidate if current->cap_effective is set + * + * We do not bother to audit if 3 things are true: + * 1) cap_effective has all caps + * 2) we are root + * 3) root is supposed to have all caps (SECURE_NOROOT) + * Since this is just a normal root execing a process. + * + * Number 1 above might fail if you don't have a full bset, but I think + * that is interesting information to audit. + */ + if (!cap_isclear(current->cap_effective)) { + if (!cap_issubset(CAP_FULL_SET, current->cap_effective) || + (bprm->e_uid != 0) || (current->uid != 0) || + issecure(SECURE_NOROOT)) + audit_log_bprm_fcaps(bprm, &pP, &pE); + } current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); } -- cgit v1.2.3 From e68b75a027bb94066576139ee33676264f867b87 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 21:48:22 +1100 Subject: When the capset syscall is used it is not possible for audit to record the actual capbilities being added/removed. This patch adds a new record type which emits the target pid and the eff, inh, and perm cap sets. example output if you audit capset syscalls would be: type=SYSCALL msg=audit(1225743140.465:76): arch=c000003e syscall=126 success=yes exit=0 a0=17f2014 a1=17f201c a2=80000000 a3=7fff2ab7f060 items=0 ppid=2160 pid=2223 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=1 comm="setcap" exe="/usr/sbin/setcap" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key=(null) type=UNKNOWN[1322] msg=audit(1225743140.465:76): pid=0 cap_pi=ffffffffffffffff cap_pp=ffffffffffffffff cap_pe=ffffffffffffffff Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/audit.h | 10 ++++++++++ kernel/auditsc.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/capability.c | 5 +++++ 3 files changed, 63 insertions(+) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 8cfb9feb2a05..6fbebac7b1bf 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -100,6 +100,7 @@ #define AUDIT_TTY 1319 /* Input on an administrative TTY */ #define AUDIT_EOE 1320 /* End of multi-record event */ #define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */ +#define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ @@ -454,6 +455,7 @@ extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __u extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification); extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); extern void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE); +extern int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm); static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -526,6 +528,13 @@ static inline void audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t __audit_log_bprm_fcaps(bprm, pP, pE); } +static inline int audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm) +{ + if (unlikely(!audit_dummy_context())) + return __audit_log_capset(pid, eff, inh, perm); + return 0; +} + extern int audit_n_rules; extern int audit_signals; #else @@ -558,6 +567,7 @@ extern int audit_signals; #define audit_mq_notify(d,n) ({ 0; }) #define audit_mq_getsetattr(d,s) ({ 0; }) #define audit_log_bprm_fcaps(b, p, e) do { ; } while (0) +#define audit_log_capset(pid, e, i, p) ({ 0; }) #define audit_ptrace(t) ((void)0) #define audit_n_rules 0 #define audit_signals 0 diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3229cd4206f5..cef34235b362 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -204,6 +204,12 @@ struct audit_aux_data_bprm_fcaps { struct audit_cap_data new_pcap; }; +struct audit_aux_data_capset { + struct audit_aux_data d; + pid_t pid; + struct audit_cap_data cap; +}; + struct audit_tree_refs { struct audit_tree_refs *next; struct audit_chunk *c[31]; @@ -1397,6 +1403,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts audit_log_cap(ab, "new_pe", &axs->new_pcap.effective); break; } + case AUDIT_CAPSET: { + struct audit_aux_data_capset *axs = (void *)aux; + audit_log_format(ab, "pid=%d", axs->pid); + audit_log_cap(ab, "cap_pi", &axs->cap.inheritable); + audit_log_cap(ab, "cap_pp", &axs->cap.permitted); + audit_log_cap(ab, "cap_pe", &axs->cap.effective); + break; } + } audit_log_end(ab); } @@ -2569,6 +2583,40 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_ ax->new_pcap.effective = current->cap_effective; } +/** + * __audit_log_capset - store information about the arguments to the capset syscall + * @pid target pid of the capset call + * @eff effective cap set + * @inh inheritible cap set + * @perm permited cap set + * + * Record the aguments userspace sent to sys_capset for later printing by the + * audit system if applicable + */ +int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm) +{ + struct audit_aux_data_capset *ax; + struct audit_context *context = current->audit_context; + + if (likely(!audit_enabled || !context || context->dummy)) + return 0; + + ax = kmalloc(sizeof(*ax), GFP_KERNEL); + if (!ax) + return -ENOMEM; + + ax->d.type = AUDIT_CAPSET; + ax->d.next = context->aux; + context->aux = (void *)ax; + + ax->pid = pid; + ax->cap.effective = *eff; + ax->cap.inheritable = *eff; + ax->cap.permitted = *perm; + + return 0; +} + /** * audit_core_dumps - record information about processes that end abnormally * @signr: signal value diff --git a/kernel/capability.c b/kernel/capability.c index e13a68535ad5..19f9eda89975 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -7,6 +7,7 @@ * 30 May 2002: Cleanup, Robert M. Love */ +#include #include #include #include @@ -468,6 +469,10 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) i++; } + ret = audit_log_capset(pid, &effective, &inheritable, &permitted); + if (ret) + return ret; + if (pid && (pid != task_pid_vnr(current))) ret = do_sys_capset_other_tasks(pid, &effective, &inheritable, &permitted); -- cgit v1.2.3 From 06112163f5fd9e491a7f810443d81efa9d88e247 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 22:02:50 +1100 Subject: Add a new capable interface that will be used by systems that use audit to make an A or B type decision instead of a security decision. Currently this is the case at least for filesystems when deciding if a process can use the reserved 'root' blocks and for the case of things like the oom algorithm determining if processes are root processes and should be less likely to be killed. These types of security system requests should not be audited or logged since they are not really security decisions. It would be possible to solve this problem like the vm_enough_memory security check did by creating a new LSM interface and moving all of the policy into that interface but proves the needlessly bloat the LSM and provide complex indirection. This merely allows those decisions to be made where they belong and to not flood logs or printk with denials for thing that are not security decisions. Signed-off-by: Eric Paris Acked-by: Stephen Smalley Signed-off-by: James Morris --- include/linux/capability.h | 3 +++ include/linux/security.h | 16 +++++++++++++--- security/commoncap.c | 8 ++++---- security/security.c | 7 ++++++- security/selinux/hooks.c | 20 +++++++++++++------- 5 files changed, 39 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/capability.h b/include/linux/capability.h index 0f1950181102..b313ba1dd5d1 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -521,6 +521,8 @@ extern const kernel_cap_t __cap_init_eff_set; kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); +extern int security_capable(struct task_struct *t, int cap); +extern int security_capable_noaudit(struct task_struct *t, int cap); /** * has_capability - Determine if a task has a superior capability available * @t: The task in question @@ -532,6 +534,7 @@ kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); * Note that this does not set PF_SUPERPRIV on the task. */ #define has_capability(t, cap) (security_capable((t), (cap)) == 0) +#define has_capability_noaudit(t, cap) (security_capable_noaudit((t), (cap)) == 0) extern int capable(int cap); diff --git a/include/linux/security.h b/include/linux/security.h index c13f1cec9abb..5fe28a671cd3 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -37,6 +37,10 @@ /* Maximum number of letters for an LSM name string */ #define SECURITY_NAME_MAX 10 +/* If capable should audit the security request */ +#define SECURITY_CAP_NOAUDIT 0 +#define SECURITY_CAP_AUDIT 1 + struct ctl_table; struct audit_krule; @@ -44,7 +48,7 @@ struct audit_krule; * These functions are in security/capability.c and are used * as the default capabilities functions */ -extern int cap_capable(struct task_struct *tsk, int cap); +extern int cap_capable(struct task_struct *tsk, int cap, int audit); extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); @@ -1307,7 +1311,7 @@ struct security_operations { kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capable) (struct task_struct *tsk, int cap); + int (*capable) (struct task_struct *tsk, int cap, int audit); int (*acct) (struct file *file); int (*sysctl) (struct ctl_table *table, int op); int (*quotactl) (int cmds, int type, int id, struct super_block *sb); @@ -1577,6 +1581,7 @@ void security_capset_set(struct task_struct *target, kernel_cap_t *inheritable, kernel_cap_t *permitted); int security_capable(struct task_struct *tsk, int cap); +int security_capable_noaudit(struct task_struct *tsk, int cap); int security_acct(struct file *file); int security_sysctl(struct ctl_table *table, int op); int security_quotactl(int cmds, int type, int id, struct super_block *sb); @@ -1782,7 +1787,12 @@ static inline void security_capset_set(struct task_struct *target, static inline int security_capable(struct task_struct *tsk, int cap) { - return cap_capable(tsk, cap); + return cap_capable(tsk, cap, SECURITY_CAP_AUDIT); +} + +static inline int security_capable_noaudit(struct task_struct *tsk, int cap) +{ + return cap_capable(tsk, cap, SECURITY_CAP_NOAUDIT); } static inline int security_acct(struct file *file) diff --git a/security/commoncap.c b/security/commoncap.c index d45393380997..dc06c0086b55 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -49,7 +49,7 @@ EXPORT_SYMBOL(cap_netlink_recv); * returns 0 when a task has a capability, but the kernel's capable() * returns 1 for this case. */ -int cap_capable (struct task_struct *tsk, int cap) +int cap_capable(struct task_struct *tsk, int cap, int audit) { /* Derived from include/linux/sched.h:capable. */ if (cap_raised(tsk->cap_effective, cap)) @@ -112,7 +112,7 @@ static inline int cap_inh_is_capped(void) * to the old permitted set. That is, if the current task * does *not* possess the CAP_SETPCAP capability. */ - return (cap_capable(current, CAP_SETPCAP) != 0); + return (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0); } static inline int cap_limit_ptraced_target(void) { return 1; } @@ -677,7 +677,7 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, || ((current->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/ || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ - || (cap_capable(current, CAP_SETPCAP) != 0)) { /*[4]*/ + || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0)) { /*[4]*/ /* * [1] no changing of bits that are locked * [2] no unlocking of locks @@ -742,7 +742,7 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) { int cap_sys_admin = 0; - if (cap_capable(current, CAP_SYS_ADMIN) == 0) + if (cap_capable(current, CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT) == 0) cap_sys_admin = 1; return __vm_enough_memory(mm, pages, cap_sys_admin); } diff --git a/security/security.c b/security/security.c index c0acfa7177e5..346f21e0ec2c 100644 --- a/security/security.c +++ b/security/security.c @@ -163,7 +163,12 @@ void security_capset_set(struct task_struct *target, int security_capable(struct task_struct *tsk, int cap) { - return security_ops->capable(tsk, cap); + return security_ops->capable(tsk, cap, SECURITY_CAP_AUDIT); +} + +int security_capable_noaudit(struct task_struct *tsk, int cap) +{ + return security_ops->capable(tsk, cap, SECURITY_CAP_NOAUDIT); } int security_acct(struct file *file) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 7fd4de46b2a9..88a3ee33068a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1365,12 +1365,14 @@ static int task_has_perm(struct task_struct *tsk1, /* Check whether a task is allowed to use a capability. */ static int task_has_capability(struct task_struct *tsk, - int cap) + int cap, int audit) { struct task_security_struct *tsec; struct avc_audit_data ad; + struct av_decision avd; u16 sclass; u32 av = CAP_TO_MASK(cap); + int rc; tsec = tsk->security; @@ -1390,7 +1392,11 @@ static int task_has_capability(struct task_struct *tsk, "SELinux: out of range capability %d\n", cap); BUG(); } - return avc_has_perm(tsec->sid, tsec->sid, sclass, av, &ad); + + rc = avc_has_perm_noaudit(tsec->sid, tsec->sid, sclass, av, 0, &avd); + if (audit == SECURITY_CAP_AUDIT) + avc_audit(tsec->sid, tsec->sid, sclass, av, &avd, rc, &ad); + return rc; } /* Check whether a task is allowed to use a system operation. */ @@ -1802,15 +1808,15 @@ static void selinux_capset_set(struct task_struct *target, kernel_cap_t *effecti secondary_ops->capset_set(target, effective, inheritable, permitted); } -static int selinux_capable(struct task_struct *tsk, int cap) +static int selinux_capable(struct task_struct *tsk, int cap, int audit) { int rc; - rc = secondary_ops->capable(tsk, cap); + rc = secondary_ops->capable(tsk, cap, audit); if (rc) return rc; - return task_has_capability(tsk, cap); + return task_has_capability(tsk, cap, audit); } static int selinux_sysctl_get_sid(ctl_table *table, u16 tclass, u32 *sid) @@ -1975,7 +1981,7 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages) int rc, cap_sys_admin = 0; struct task_security_struct *tsec = current->security; - rc = secondary_ops->capable(current, CAP_SYS_ADMIN); + rc = secondary_ops->capable(current, CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT); if (rc == 0) rc = avc_has_perm_noaudit(tsec->sid, tsec->sid, SECCLASS_CAPABILITY, @@ -2829,7 +2835,7 @@ static int selinux_inode_getsecurity(const struct inode *inode, const char *name * and lack of permission just means that we fall back to the * in-core context value, not a denial. */ - error = secondary_ops->capable(current, CAP_MAC_ADMIN); + error = secondary_ops->capable(current, CAP_MAC_ADMIN, SECURITY_CAP_NOAUDIT); if (!error) error = avc_has_perm_noaudit(tsec->sid, tsec->sid, SECCLASS_CAPABILITY2, -- cgit v1.2.3 From 50ee91765e25e7967a7b69cd5cc2bcab85e2eeb8 Mon Sep 17 00:00:00 2001 From: Dhaval Giani Date: Tue, 11 Nov 2008 18:13:23 +0530 Subject: sched/rt: removed unneeded defintion Impact: cleanup This function no longer exists, so remove the defintion. Signed-off-by: Dhaval Giani Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..c6bfb34d978e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -258,8 +258,6 @@ static inline int select_nohz_load_balancer(int cpu) } #endif -extern unsigned long rt_needs_cpu(int cpu); - /* * Only dump TASK_* tasks. (0 for all tasks) */ -- cgit v1.2.3 From 9b739ba5e66c96938fbc07a4dbd9da5b81eac56f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 11 Nov 2008 16:47:44 -0800 Subject: net: remove struct neigh_table::pde ->pde isn't actually needed, since name is stashed in ->id. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/neighbour.h | 3 --- net/core/neighbour.c | 5 ++--- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index aa4b708654a4..365b5e260239 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -180,9 +180,6 @@ struct neigh_table __u32 hash_rnd; unsigned int hash_chain_gc; struct pneigh_entry **phash_buckets; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *pde; -#endif }; /* flags for neigh_update() */ diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d9bbe010e0ee..500c2430007c 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1424,9 +1424,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS - tbl->pde = proc_create_data(tbl->id, 0, init_net.proc_net_stat, - &neigh_stat_seq_fops, tbl); - if (!tbl->pde) + if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat, + &neigh_stat_seq_fops, tbl)) panic("cannot create neighbour proc dir entry"); #endif -- cgit v1.2.3 From 6bb3ce25d05f2990c8a19adaf427531430267c1f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 11 Nov 2008 17:25:22 -0800 Subject: net: remove struct dst_entry::entry_size Unused after kmem_cache_zalloc() conversion. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/dst.h | 1 - net/decnet/dn_route.c | 1 - net/ipv4/route.c | 2 -- net/ipv4/xfrm4_policy.c | 1 - net/ipv6/route.c | 2 -- net/ipv6/xfrm6_policy.c | 1 - 6 files changed, 8 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index f96c4ba4dd32..65a60fab2f82 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -104,7 +104,6 @@ struct dst_ops void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, u32 mtu); int (*local_out)(struct sk_buff *skb); - int entry_size; atomic_t entries; struct kmem_cache *kmem_cachep; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 821bd1cdec04..768df000523b 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -131,7 +131,6 @@ static struct dst_ops dn_dst_ops = { .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, - .entry_size = sizeof(struct dn_route), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0dc0c3826763..4e6959c29819 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -160,7 +160,6 @@ static struct dst_ops ipv4_dst_ops = { .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, .local_out = __ip_local_out, - .entry_size = sizeof(struct rtable), .entries = ATOMIC_INIT(0), }; @@ -2701,7 +2700,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .destroy = ipv4_dst_destroy, .check = ipv4_dst_check, .update_pmtu = ipv4_rt_blackhole_update_pmtu, - .entry_size = sizeof(struct rtable), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index f9a775b7e796..84dbb5a03cc2 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -246,7 +246,6 @@ static struct dst_ops xfrm4_dst_ops = { .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, .gc_thresh = 1024, - .entry_size = sizeof(struct xfrm_dst), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4d40dc214b2d..9da1ece466a2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -108,7 +108,6 @@ static struct dst_ops ip6_dst_ops_template = { .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, .local_out = __ip6_local_out, - .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; @@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = { .destroy = ip6_dst_destroy, .check = ip6_dst_check, .update_pmtu = ip6_rt_blackhole_update_pmtu, - .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 604bc0a96c05..3b67ce7786ec 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -277,7 +277,6 @@ static struct dst_ops xfrm6_dst_ops = { .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, .gc_thresh = 1024, - .entry_size = sizeof(struct xfrm_dst), .entries = ATOMIC_INIT(0), }; -- cgit v1.2.3 From d90ebcbfa7f5a8b4e20518c9f94c5c4e4cd3c2e5 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Wed, 12 Nov 2008 00:47:26 -0800 Subject: dccp: Query supported CCIDs This provides a data structure to record which CCIDs are locally supported and three accessor functions: - a test function for internal use which is used to validate CCID requests made by the user; - a copy function so that the list can be used for feature-negotiation; - documented getsockopt() support so that the user can query capabilities. The data structure is a table which is filled in at compile-time with the list of available CCIDs (which in turn depends on the Kconfig choices). Using the copy function for cloning the list of supported CCIDs is useful for feature negotiation, since the negotiation is now with the full list of available CCIDs (e.g. {2, 3}) instead of the default value {2}. This means negotiation will not fail if the peer requests to use CCID3 instead of CCID2. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- Documentation/networking/dccp.txt | 4 ++++ include/linux/dccp.h | 1 + net/dccp/ccid.c | 48 +++++++++++++++++++++++++++++++++++++++ net/dccp/ccid.h | 5 ++++ net/dccp/feat.c | 4 ++++ net/dccp/proto.c | 2 ++ 6 files changed, 64 insertions(+) (limited to 'include') diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt index 39131a3c78f8..f0aeb20fa63b 100644 --- a/Documentation/networking/dccp.txt +++ b/Documentation/networking/dccp.txt @@ -57,6 +57,10 @@ can be set before calling bind(). DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet size (application payload size) in bytes, see RFC 4340, section 14. +DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs +supported by the endpoint (see include/linux/dccp.h for symbolic constants). +The caller needs to provide a sufficiently large (> 2) array of type uint8_t. + DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold timewait state when closing the connection (RFC 4340, 8.3). The usual case is that the closing server sends a CloseReq, whereupon the client holds timewait diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 484b8a1fb023..d3ac1bde60b4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -209,6 +209,7 @@ struct dccp_so_feat { #define DCCP_SOCKOPT_SERVER_TIMEWAIT 6 #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 +#define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 8fe931a3d7a1..647cb0614f84 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -13,6 +13,13 @@ #include "ccid.h" +static u8 builtin_ccids[] = { + DCCPC_CCID2, /* CCID2 is supported by default */ +#if defined(CONFIG_IP_DCCP_CCID3) || defined(CONFIG_IP_DCCP_CCID3_MODULE) + DCCPC_CCID3, +#endif +}; + static struct ccid_operations *ccids[CCID_MAX]; #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) static atomic_t ccids_lockct = ATOMIC_INIT(0); @@ -86,6 +93,47 @@ static void ccid_kmem_cache_destroy(struct kmem_cache *slab) } } +/* check that up to @array_len members in @ccid_array are supported */ +bool ccid_support_check(u8 const *ccid_array, u8 array_len) +{ + u8 i, j, found; + + for (i = 0, found = 0; i < array_len; i++, found = 0) { + for (j = 0; !found && j < ARRAY_SIZE(builtin_ccids); j++) + found = (ccid_array[i] == builtin_ccids[j]); + if (!found) + return false; + } + return true; +} + +/** + * ccid_get_builtin_ccids - Provide copy of `builtin' CCID array + * @ccid_array: pointer to copy into + * @array_len: value to return length into + * This function allocates memory - caller must see that it is freed after use. + */ +int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len) +{ + *ccid_array = kmemdup(builtin_ccids, sizeof(builtin_ccids), gfp_any()); + if (*ccid_array == NULL) + return -ENOBUFS; + *array_len = ARRAY_SIZE(builtin_ccids); + return 0; +} + +int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, + char __user *optval, int __user *optlen) +{ + if (len < sizeof(builtin_ccids)) + return -EINVAL; + + if (put_user(sizeof(builtin_ccids), optlen) || + copy_to_user(optval, builtin_ccids, sizeof(builtin_ccids))) + return -EFAULT; + return 0; +} + int ccid_register(struct ccid_operations *ccid_ops) { int err = -ENOBUFS; diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index fdeae7b57319..259f5469d7d0 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -103,6 +103,11 @@ static inline void *ccid_priv(const struct ccid *ccid) return (void *)ccid->ccid_priv; } +extern bool ccid_support_check(u8 const *ccid_array, u8 array_len); +extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len); +extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, + char __user *, int __user *); + extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp); diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 192d494a3816..f79fb5e33f5e 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -342,6 +342,10 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local, !dccp_feat_sp_list_ok(feat, sp_val, sp_len)) return -EINVAL; + /* Avoid negotiating alien CCIDs by only advertising supported ones */ + if (feat == DCCPF_CCID && !ccid_support_check(sp_val, sp_len)) + return -EOPNOTSUPP; + if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len)) return -ENOMEM; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 01332fe7a99a..b4b10cbd8880 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -649,6 +649,8 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_GET_CUR_MPS: val = dp->dccps_mss_cache; break; + case DCCP_SOCKOPT_AVAILABLE_CCIDS: + return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen); case DCCP_SOCKOPT_SERVER_TIMEWAIT: val = dp->dccps_server_timewait; break; -- cgit v1.2.3 From 8f424b5f32d78b4f353b3cddca9804808ef063eb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Nov 2008 00:53:30 -0800 Subject: net: Introduce read_pnet() and write_pnet() helpers This patch introduces two helpers that deal with reading and writing struct net pointers in various network structures. Their implementation depends on CONFIG_NET_NS For symmetry, both functions work with "struct net **pnet". Their usage should reduce the number of #ifdef CONFIG_NET_NS, without adding many helpers for each network structure that hold a "struct net *pointer" Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/net_namespace.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 700c53a3c6fa..319557789a40 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -192,6 +192,24 @@ static inline void release_net(struct net *net) } #endif +#ifdef CONFIG_NET_NS + +static inline void write_pnet(struct net **pnet, struct net *net) +{ + *pnet = net; +} + +static inline struct net *read_pnet(struct net * const *pnet) +{ + return *pnet; +} + +#else + +#define write_pnet(pnet, net) do { (void)(net);} while (0) +#define read_pnet(pnet) (&init_net) + +#endif #define for_each_net(VAR) \ list_for_each_entry(VAR, &net_namespace_list, list) -- cgit v1.2.3 From 7a9546ee354ec6f23af403992b8c07baa50a23d2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Nov 2008 00:54:20 -0800 Subject: net: ib_net pointer should depends on CONFIG_NET_NS We can shrink size of "struct inet_bind_bucket" by 50%, using read_pnet() and write_pnet() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 7 +++++++ net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/inet_hashtables.c | 6 +++--- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 5cc182f9ecae..cb31fbf8ae2a 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -77,13 +77,20 @@ struct inet_ehash_bucket { * ports are created in O(1) time? I thought so. ;-) -DaveM */ struct inet_bind_bucket { +#ifdef CONFIG_NET_NS struct net *ib_net; +#endif unsigned short port; signed short fastreuse; struct hlist_node node; struct hlist_head owners; }; +static inline struct net *ib_net(struct inet_bind_bucket *ib) +{ + return read_pnet(&ib->ib_net); +} + #define inet_bind_bucket_for_each(tb, node, head) \ hlist_for_each_entry(tb, node, head, node) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 36f4cbc7da3a..05af807ca9b9 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -109,7 +109,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->ib_net == net && tb->port == rover) + if (ib_net(tb) == net && tb->port == rover) goto next; break; next: @@ -137,7 +137,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->ib_net == net && tb->port == snum) + if (ib_net(tb) == net && tb->port == snum) goto tb_found; } tb = NULL; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 44981906fb91..be41ebbec4eb 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -35,7 +35,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb != NULL) { - tb->ib_net = hold_net(net); + write_pnet(&tb->ib_net, hold_net(net)); tb->port = snum; tb->fastreuse = 0; INIT_HLIST_HEAD(&tb->owners); @@ -51,7 +51,7 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); - release_net(tb->ib_net); + release_net(ib_net(tb)); kmem_cache_free(cachep, tb); } } @@ -449,7 +449,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, * unique enough. */ inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->ib_net == net && tb->port == port) { + if (ib_net(tb) == net && tb->port == port) { WARN_ON(hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; -- cgit v1.2.3 From e42ea986e4a4cab4209d982feffcaf50f21e80e3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Nov 2008 00:54:54 -0800 Subject: net: Cleanup of neighbour code Using read_pnet() and write_pnet() in neighbour code ease the reading of code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 12 ++---------- net/core/neighbour.c | 12 +++--------- 2 files changed, 5 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 365b5e260239..d8d790e56d3d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -220,11 +220,7 @@ extern void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *p static inline struct net *neigh_parms_net(const struct neigh_parms *parms) { -#ifdef CONFIG_NET_NS - return parms->net; -#else - return &init_net; -#endif + return read_pnet(&parms->net); } extern unsigned long neigh_rand_reach_time(unsigned long base); @@ -241,11 +237,7 @@ extern int pneigh_delete(struct neigh_table *tbl, struct net *net, const void static inline struct net *pneigh_net(const struct pneigh_entry *pneigh) { -#ifdef CONFIG_NET_NS - return pneigh->net; -#else - return &init_net; -#endif + return read_pnet(&pneigh->net); } extern void neigh_app_ns(struct neighbour *n); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 500c2430007c..cca6a55909eb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -531,9 +531,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, if (!n) goto out; -#ifdef CONFIG_NET_NS - n->net = hold_net(net); -#endif + write_pnet(&n->net, hold_net(net)); memcpy(n->key, pkey, key_len); n->dev = dev; if (dev) @@ -1350,9 +1348,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, dev_hold(dev); p->dev = dev; -#ifdef CONFIG_NET_NS - p->net = hold_net(net); -#endif + write_pnet(&p->net, hold_net(net)); p->sysctl_table = NULL; write_lock_bh(&tbl->lock); p->next = tbl->parms.next; @@ -1407,9 +1403,7 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) unsigned long now = jiffies; unsigned long phsize; -#ifdef CONFIG_NET_NS - tbl->parms.net = &init_net; -#endif + write_pnet(&tbl->parms.net, &init_net); atomic_set(&tbl->parms.refcnt, 1); tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time); -- cgit v1.2.3 From 3f5ec13696fd4a33bde42f385406cbb1d3cc96fd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Nov 2008 23:21:31 +0100 Subject: tracing/fastboot: move boot tracer structs and funcs into their own header. Impact: Cleanups on the boot tracer and ftrace This patch bring some cleanups about the boot tracer headers. The functions and structures of this tracer have nothing related to ftrace and should have so their own header file. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 41 ----------------------------------------- include/trace/boot.h | 43 +++++++++++++++++++++++++++++++++++++++++++ init/main.c | 1 + kernel/trace/trace.h | 1 + 4 files changed, 45 insertions(+), 41 deletions(-) create mode 100644 include/trace/boot.h (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index dcbbf72a88b1..4fbc4a8b86a5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -287,45 +287,4 @@ extern trace_function_return_t ftrace_function_return; extern void unregister_ftrace_return(void); #endif -/* - * Structure which defines the trace of an initcall. - * You don't have to fill the func field since it is - * only used internally by the tracer. - */ -struct boot_trace { - pid_t caller; - char func[KSYM_NAME_LEN]; - int result; - unsigned long long duration; /* usecs */ - ktime_t calltime; - ktime_t rettime; -}; - -#ifdef CONFIG_BOOT_TRACER -/* Append the trace on the ring-buffer */ -extern void trace_boot(struct boot_trace *it, initcall_t fn); - -/* Tells the tracer that smp_pre_initcall is finished. - * So we can start the tracing - */ -extern void start_boot_trace(void); - -/* Resume the tracing of other necessary events - * such as sched switches - */ -extern void enable_boot_trace(void); - -/* Suspend this tracing. Actually, only sched_switches tracing have - * to be suspended. Initcalls doesn't need it.) - */ -extern void disable_boot_trace(void); -#else -static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } -static inline void start_boot_trace(void) { } -static inline void enable_boot_trace(void) { } -static inline void disable_boot_trace(void) { } -#endif - - - #endif /* _LINUX_FTRACE_H */ diff --git a/include/trace/boot.h b/include/trace/boot.h new file mode 100644 index 000000000000..4cbe64e46cdc --- /dev/null +++ b/include/trace/boot.h @@ -0,0 +1,43 @@ +#ifndef _LINUX_TRACE_BOOT_H +#define _LINUX_TRACE_BOOT_H + +/* + * Structure which defines the trace of an initcall. + * You don't have to fill the func field since it is + * only used internally by the tracer. + */ +struct boot_trace { + pid_t caller; + char func[KSYM_NAME_LEN]; + int result; + unsigned long long duration; /* usecs */ + ktime_t calltime; + ktime_t rettime; +}; + +#ifdef CONFIG_BOOT_TRACER +/* Append the trace on the ring-buffer */ +extern void trace_boot(struct boot_trace *it, initcall_t fn); + +/* Tells the tracer that smp_pre_initcall is finished. + * So we can start the tracing + */ +extern void start_boot_trace(void); + +/* Resume the tracing of other necessary events + * such as sched switches + */ +extern void enable_boot_trace(void); + +/* Suspend this tracing. Actually, only sched_switches tracing have + * to be suspended. Initcalls doesn't need it.) + */ +extern void disable_boot_trace(void); +#else +static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } +static inline void start_boot_trace(void) { } +static inline void enable_boot_trace(void) { } +static inline void disable_boot_trace(void) { } +#endif /* CONFIG_BOOT_TRACER */ + +#endif /* __LINUX_TRACE_BOOT_H */ diff --git a/init/main.c b/init/main.c index 4b03cd5656ca..16ca1ee071c4 100644 --- a/init/main.c +++ b/init/main.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e40ce0c14690..f69a5199596b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -8,6 +8,7 @@ #include #include #include +#include enum trace_type { __TRACE_FIRST_TYPE = 0, -- cgit v1.2.3 From 74239072830ef3f1398edeb1bc1076fc330fd4a2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 11 Nov 2008 23:24:42 +0100 Subject: tracing/fastboot: Use the ring-buffer timestamp for initcall entries Impact: Split the boot tracer entries in two parts: call and return Now that we are using the sched tracer from the boot tracer, we want to use the same timestamp than the ring-buffer to have consistent time captures between sched events and initcall events. So we get rid of the old time capture by the boot tracer and split the initcall events in two parts: call and return. This way we have the ring buffer timestamp of both. An example trace: [ 27.904149584] calling net_ns_init+0x0/0x1c0 @ 1 [ 27.904429624] initcall net_ns_init+0x0/0x1c0 returned 0 after 0 msecs [ 27.904575926] calling reboot_init+0x0/0x20 @ 1 [ 27.904655399] initcall reboot_init+0x0/0x20 returned 0 after 0 msecs [ 27.904800228] calling sysctl_init+0x0/0x30 @ 1 [ 27.905142914] initcall sysctl_init+0x0/0x30 returned 0 after 0 msecs [ 27.905287211] calling ksysfs_init+0x0/0xb0 @ 1 ##### CPU 0 buffer started #### init-1 [000] 27.905395: 1:120:R + [001] 11:115:S ##### CPU 1 buffer started #### -0 [001] 27.905425: 0:140:R ==> [001] 11:115:R init-1 [000] 27.905426: 1:120:D ==> [000] 0:140:R -0 [000] 27.905431: 0:140:R + [000] 4:115:S -0 [000] 27.905451: 0:140:R ==> [000] 4:115:R ksoftirqd/0-4 [000] 27.905456: 4:115:S ==> [000] 0:140:R udevd-11 [001] 27.905458: 11:115:R + [001] 14:115:R -0 [000] 27.905459: 0:140:R + [000] 4:115:S -0 [000] 27.905462: 0:140:R ==> [000] 4:115:R udevd-11 [001] 27.905462: 11:115:R ==> [001] 14:115:R ksoftirqd/0-4 [000] 27.905467: 4:115:S ==> [000] 0:140:R -0 [000] 27.905470: 0:140:R + [000] 4:115:S -0 [000] 27.905473: 0:140:R ==> [000] 4:115:R ksoftirqd/0-4 [000] 27.905476: 4:115:S ==> [000] 0:140:R -0 [000] 27.905479: 0:140:R + [000] 4:115:S -0 [000] 27.905482: 0:140:R ==> [000] 4:115:R ksoftirqd/0-4 [000] 27.905486: 4:115:S ==> [000] 0:140:R udevd-14 [001] 27.905499: 14:120:X ==> [001] 11:115:R udevd-11 [001] 27.905506: 11:115:R + [000] 1:120:D -0 [000] 27.905515: 0:140:R ==> [000] 1:120:R udevd-11 [001] 27.905517: 11:115:S ==> [001] 0:140:R [ 27.905557107] initcall ksysfs_init+0x0/0xb0 returned 0 after 3906 msecs [ 27.905705736] calling init_jiffies_clocksource+0x0/0x10 @ 1 [ 27.905779239] initcall init_jiffies_clocksource+0x0/0x10 returned 0 after 0 msecs [ 27.906769814] calling pm_init+0x0/0x30 @ 1 [ 27.906853627] initcall pm_init+0x0/0x30 returned 0 after 0 msecs [ 27.906997803] calling pm_disk_init+0x0/0x20 @ 1 [ 27.907076946] initcall pm_disk_init+0x0/0x20 returned 0 after 0 msecs [ 27.907222556] calling swsusp_header_init+0x0/0x30 @ 1 [ 27.907294325] initcall swsusp_header_init+0x0/0x30 returned 0 after 0 msecs [ 27.907439620] calling stop_machine_init+0x0/0x50 @ 1 init-1 [000] 27.907485: 1:120:R + [000] 2:115:S init-1 [000] 27.907490: 1:120:D ==> [000] 2:115:R kthreadd-2 [000] 27.907507: 2:115:R + [001] 15:115:R -0 [001] 27.907517: 0:140:R ==> [001] 15:115:R kthreadd-2 [000] 27.907517: 2:115:D ==> [000] 0:140:R -0 [000] 27.907521: 0:140:R + [000] 4:115:S -0 [000] 27.907524: 0:140:R ==> [000] 4:115:R udevd-15 [001] 27.907527: 15:115:D + [000] 2:115:D ksoftirqd/0-4 [000] 27.907537: 4:115:S ==> [000] 2:115:R udevd-15 [001] 27.907537: 15:115:D ==> [001] 0:140:R kthreadd-2 [000] 27.907546: 2:115:R + [000] 1:120:D kthreadd-2 [000] 27.907550: 2:115:S ==> [000] 1:120:R init-1 [000] 27.907584: 1:120:R + [000] 15: 0:D init-1 [000] 27.907589: 1:120:R + [000] 2:115:S init-1 [000] 27.907593: 1:120:D ==> [000] 15: 0:R udevd-15 [000] 27.907601: 15: 0:S ==> [000] 2:115:R ##### CPU 0 buffer started #### kthreadd-2 [000] 27.907616: 2:115:R + [001] 16:115:R ##### CPU 1 buffer started #### -0 [001] 27.907620: 0:140:R ==> [001] 16:115:R kthreadd-2 [000] 27.907621: 2:115:D ==> [000] 0:140:R udevd-16 [001] 27.907625: 16:115:D + [000] 2:115:D -0 [000] 27.907628: 0:140:R + [000] 4:115:S udevd-16 [001] 27.907629: 16:115:D ==> [001] 0:140:R -0 [000] 27.907631: 0:140:R ==> [000] 4:115:R ksoftirqd/0-4 [000] 27.907636: 4:115:S ==> [000] 2:115:R kthreadd-2 [000] 27.907644: 2:115:R + [000] 1:120:D kthreadd-2 [000] 27.907647: 2:115:S ==> [000] 1:120:R init-1 [000] 27.907657: 1:120:R + [001] 16: 0:D -0 [001] 27.907666: 0:140:R ==> [001] 16: 0:R [ 27.907703862] initcall stop_machine_init+0x0/0x50 returned 0 after 0 msecs [ 27.907850704] calling filelock_init+0x0/0x30 @ 1 [ 27.907926573] initcall filelock_init+0x0/0x30 returned 0 after 0 msecs [ 27.908071327] calling init_script_binfmt+0x0/0x10 @ 1 [ 27.908165195] initcall init_script_binfmt+0x0/0x10 returned 0 after 0 msecs [ 27.908309461] calling init_elf_binfmt+0x0/0x10 @ 1 Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/trace/boot.h | 31 ++++++++---- init/main.c | 32 ++++++------ kernel/trace/trace.h | 17 +++++-- kernel/trace/trace_boot.c | 123 +++++++++++++++++++++++++++++++++++----------- 4 files changed, 144 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/include/trace/boot.h b/include/trace/boot.h index 4cbe64e46cdc..6b54537eab02 100644 --- a/include/trace/boot.h +++ b/include/trace/boot.h @@ -2,22 +2,30 @@ #define _LINUX_TRACE_BOOT_H /* - * Structure which defines the trace of an initcall. + * Structure which defines the trace of an initcall + * while it is called. * You don't have to fill the func field since it is * only used internally by the tracer. */ -struct boot_trace { +struct boot_trace_call { pid_t caller; char func[KSYM_NAME_LEN]; - int result; - unsigned long long duration; /* usecs */ - ktime_t calltime; - ktime_t rettime; +}; + +/* + * Structure which defines the trace of an initcall + * while it returns. + */ +struct boot_trace_ret { + char func[KSYM_NAME_LEN]; + int result; + unsigned long long duration; /* nsecs */ }; #ifdef CONFIG_BOOT_TRACER -/* Append the trace on the ring-buffer */ -extern void trace_boot(struct boot_trace *it, initcall_t fn); +/* Append the traces on the ring-buffer */ +extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn); +extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn); /* Tells the tracer that smp_pre_initcall is finished. * So we can start the tracing @@ -34,7 +42,12 @@ extern void enable_boot_trace(void); */ extern void disable_boot_trace(void); #else -static inline void trace_boot(struct boot_trace *it, initcall_t fn) { } +static inline +void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { } + +static inline +void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { } + static inline void start_boot_trace(void) { } static inline void enable_boot_trace(void) { } static inline void disable_boot_trace(void) { } diff --git a/init/main.c b/init/main.c index 16ca1ee071c4..e810196bf2f2 100644 --- a/init/main.c +++ b/init/main.c @@ -704,33 +704,35 @@ core_param(initcall_debug, initcall_debug, bool, 0644); int do_one_initcall(initcall_t fn) { int count = preempt_count(); - ktime_t delta; + ktime_t calltime, delta, rettime; char msgbuf[64]; - struct boot_trace it; + struct boot_trace_call call; + struct boot_trace_ret ret; if (initcall_debug) { - it.caller = task_pid_nr(current); - printk("calling %pF @ %i\n", fn, it.caller); - it.calltime = ktime_get(); + call.caller = task_pid_nr(current); + printk("calling %pF @ %i\n", fn, call.caller); + calltime = ktime_get(); + trace_boot_call(&call, fn); enable_boot_trace(); } - it.result = fn(); + ret.result = fn(); if (initcall_debug) { - it.rettime = ktime_get(); - delta = ktime_sub(it.rettime, it.calltime); - it.duration = (unsigned long long) delta.tv64 >> 10; - printk("initcall %pF returned %d after %Ld usecs\n", fn, - it.result, it.duration); - trace_boot(&it, fn); disable_boot_trace(); + rettime = ktime_get(); + delta = ktime_sub(rettime, calltime); + ret.duration = (unsigned long long) delta.tv64 >> 10; + trace_boot_ret(&ret, fn); + printk("initcall %pF returned %d after %Ld usecs\n", fn, + ret.result, ret.duration); } msgbuf[0] = 0; - if (it.result && it.result != -ENODEV && initcall_debug) - sprintf(msgbuf, "error code %d ", it.result); + if (ret.result && ret.result != -ENODEV && initcall_debug) + sprintf(msgbuf, "error code %d ", ret.result); if (preempt_count() != count) { strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf)); @@ -744,7 +746,7 @@ int do_one_initcall(initcall_t fn) printk("initcall %pF returned with %s\n", fn, msgbuf); } - return it.result; + return ret.result; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f69a5199596b..b5f91f198fd4 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -22,7 +22,8 @@ enum trace_type { TRACE_SPECIAL, TRACE_MMIO_RW, TRACE_MMIO_MAP, - TRACE_BOOT, + TRACE_BOOT_CALL, + TRACE_BOOT_RET, TRACE_FN_RET, __TRACE_LAST_TYPE @@ -123,9 +124,14 @@ struct trace_mmiotrace_map { struct mmiotrace_map map; }; -struct trace_boot { +struct trace_boot_call { struct trace_entry ent; - struct boot_trace initcall; + struct boot_trace_call boot_call; +}; + +struct trace_boot_ret { + struct trace_entry ent; + struct boot_trace_ret boot_ret; }; /* @@ -228,8 +234,9 @@ extern void __ftrace_bad_type(void); TRACE_MMIO_RW); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \ TRACE_MMIO_MAP); \ - IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT); \ - IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET); \ + IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ + IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ + IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\ __ftrace_bad_type(); \ } while (0) diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 8f71915e8bb4..cb333b7fd113 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -58,35 +58,71 @@ static void boot_trace_init(struct trace_array *tr) tracing_sched_switch_assign_trace(tr); } -static enum print_line_t initcall_print_line(struct trace_iterator *iter) +static enum print_line_t +initcall_call_print_line(struct trace_iterator *iter) { + struct trace_entry *entry = iter->ent; + struct trace_seq *s = &iter->seq; + struct trace_boot_call *field; + struct boot_trace_call *call; + u64 ts; + unsigned long nsec_rem; int ret; + + trace_assign_type(field, entry); + call = &field->boot_call; + ts = iter->ts; + nsec_rem = do_div(ts, 1000000000); + + ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", + (unsigned long)ts, nsec_rem, call->func, call->caller); + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + else + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +initcall_ret_print_line(struct trace_iterator *iter) +{ struct trace_entry *entry = iter->ent; - struct trace_boot *field = (struct trace_boot *)entry; - struct boot_trace *it = &field->initcall; struct trace_seq *s = &iter->seq; - struct timespec calltime = ktime_to_timespec(it->calltime); - struct timespec rettime = ktime_to_timespec(it->rettime); - - if (entry->type == TRACE_BOOT) { - ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n", - calltime.tv_sec, - calltime.tv_nsec, - it->func, it->caller); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " - "returned %d after %lld msecs\n", - rettime.tv_sec, - rettime.tv_nsec, - it->func, it->result, it->duration); - - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + struct trace_boot_ret *field; + struct boot_trace_ret *init_ret; + u64 ts; + unsigned long nsec_rem; + int ret; + + trace_assign_type(field, entry); + init_ret = &field->boot_ret; + ts = iter->ts; + nsec_rem = do_div(ts, 1000000000); + + ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s " + "returned %d after %llu msecs\n", + (unsigned long) ts, + nsec_rem, + init_ret->func, init_ret->result, init_ret->duration); + + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + else return TRACE_TYPE_HANDLED; +} + +static enum print_line_t initcall_print_line(struct trace_iterator *iter) +{ + struct trace_entry *entry = iter->ent; + + switch (entry->type) { + case TRACE_BOOT_CALL: + return initcall_call_print_line(iter); + case TRACE_BOOT_RET: + return initcall_ret_print_line(iter); + default: + return TRACE_TYPE_UNHANDLED; } - return TRACE_TYPE_UNHANDLED; } struct tracer boot_tracer __read_mostly = @@ -97,11 +133,10 @@ struct tracer boot_tracer __read_mostly = .print_line = initcall_print_line, }; -void trace_boot(struct boot_trace *it, initcall_t fn) +void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { struct ring_buffer_event *event; - struct trace_boot *entry; - struct trace_array_cpu *data; + struct trace_boot_call *entry; unsigned long irq_flags; struct trace_array *tr = boot_trace; @@ -111,9 +146,37 @@ void trace_boot(struct boot_trace *it, initcall_t fn) /* Get its name now since this function could * disappear because it is in the .init section. */ - sprint_symbol(it->func, (unsigned long)fn); + sprint_symbol(bt->func, (unsigned long)fn); + preempt_disable(); + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + entry->ent.type = TRACE_BOOT_CALL; + entry->boot_call = *bt; + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); + + out: + preempt_enable(); +} + +void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) +{ + struct ring_buffer_event *event; + struct trace_boot_ret *entry; + unsigned long irq_flags; + struct trace_array *tr = boot_trace; + + if (!pre_initcalls_finished) + return; + + sprint_symbol(bt->func, (unsigned long)fn); preempt_disable(); - data = tr->data[smp_processor_id()]; event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq_flags); @@ -121,8 +184,8 @@ void trace_boot(struct boot_trace *it, initcall_t fn) goto out; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_BOOT; - entry->initcall = *it; + entry->ent.type = TRACE_BOOT_RET; + entry->boot_ret = *bt; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); trace_wake_up(); -- cgit v1.2.3 From a7d26f84cb76b962217ce277c3e3a9748a26ac11 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 12 Nov 2008 11:02:25 +0100 Subject: ALSA: Release v1.0.18a Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai --- include/sound/version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/version.h b/include/sound/version.h index eae315573563..2b48237e23bf 100644 --- a/include/sound/version.h +++ b/include/sound/version.h @@ -1,3 +1,3 @@ /* include/version.h */ -#define CONFIG_SND_VERSION "1.0.18" +#define CONFIG_SND_VERSION "1.0.18a" #define CONFIG_SND_DATE "" -- cgit v1.2.3 From 92a77aac9812d5397abbe6f1920e085e50838635 Mon Sep 17 00:00:00 2001 From: James Morris Date: Wed, 12 Nov 2008 21:20:00 +1100 Subject: security: remove broken and useless declarations Remove broken declarations for security_capable* functions, which were not needed anyway. Signed-off-by: James Morris --- include/linux/capability.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/capability.h b/include/linux/capability.h index b313ba1dd5d1..7f26580a5a4d 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -521,8 +521,6 @@ extern const kernel_cap_t __cap_init_eff_set; kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); -extern int security_capable(struct task_struct *t, int cap); -extern int security_capable_noaudit(struct task_struct *t, int cap); /** * has_capability - Determine if a task has a superior capability available * @t: The task in question -- cgit v1.2.3 From 1f0d69a9fc815db82f15722bf05227190b1d714d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 00:14:39 -0500 Subject: tracing: profile likely and unlikely annotations Impact: new unlikely/likely profiler Andrew Morton recently suggested having an in-kernel way to profile likely and unlikely macros. This patch achieves that goal. When configured, every(*) likely and unlikely macro gets a counter attached to it. When the condition is hit, the hit and misses of that condition are recorded. These numbers can later be retrieved by: /debugfs/tracing/profile_likely - All likely markers /debugfs/tracing/profile_unlikely - All unlikely markers. # cat /debug/tracing/profile_unlikely | head correct incorrect % Function File Line ------- --------- - -------- ---- ---- 2167 0 0 do_arch_prctl process_64.c 832 0 0 0 do_arch_prctl process_64.c 804 2670 0 0 IS_ERR err.h 34 71230 5693 7 __switch_to process_64.c 673 76919 0 0 __switch_to process_64.c 639 43184 33743 43 __switch_to process_64.c 624 12740 64181 83 __switch_to process_64.c 594 12740 64174 83 __switch_to process_64.c 590 # cat /debug/tracing/profile_unlikely | \ awk '{ if ($3 > 25) print $0; }' |head -20 44963 35259 43 __switch_to process_64.c 624 12762 67454 84 __switch_to process_64.c 594 12762 67447 84 __switch_to process_64.c 590 1478 595 28 syscall_get_error syscall.h 51 0 2821 100 syscall_trace_leave ptrace.c 1567 0 1 100 native_smp_prepare_cpus smpboot.c 1237 86338 265881 75 calc_delta_fair sched_fair.c 408 210410 108540 34 calc_delta_mine sched.c 1267 0 54550 100 sched_info_queued sched_stats.h 222 51899 66435 56 pick_next_task_fair sched_fair.c 1422 6 10 62 yield_task_fair sched_fair.c 982 7325 2692 26 rt_policy sched.c 144 0 1270 100 pre_schedule_rt sched_rt.c 1261 1268 48073 97 pick_next_task_rt sched_rt.c 884 0 45181 100 sched_info_dequeued sched_stats.h 177 0 15 100 sched_move_task sched.c 8700 0 15 100 sched_move_task sched.c 8690 53167 33217 38 schedule sched.c 4457 0 80208 100 sched_info_switch sched_stats.h 270 30585 49631 61 context_switch sched.c 2619 # cat /debug/tracing/profile_likely | awk '{ if ($3 > 25) print $0; }' 39900 36577 47 pick_next_task sched.c 4397 20824 15233 42 switch_mm mmu_context_64.h 18 0 7 100 __cancel_work_timer workqueue.c 560 617 66484 99 clocksource_adjust timekeeping.c 456 0 346340 100 audit_syscall_exit auditsc.c 1570 38 347350 99 audit_get_context auditsc.c 732 0 345244 100 audit_syscall_entry auditsc.c 1541 38 1017 96 audit_free auditsc.c 1446 0 1090 100 audit_alloc auditsc.c 862 2618 1090 29 audit_alloc auditsc.c 858 0 6 100 move_masked_irq migration.c 9 1 198 99 probe_sched_wakeup trace_sched_switch.c 58 2 2 50 probe_wakeup trace_sched_wakeup.c 227 0 2 100 probe_wakeup_sched_switch trace_sched_wakeup.c 144 4514 2090 31 __grab_cache_page filemap.c 2149 12882 228786 94 mapping_unevictable pagemap.h 50 4 11 73 __flush_cpu_slab slub.c 1466 627757 330451 34 slab_free slub.c 1731 2959 61245 95 dentry_lru_del_init dcache.c 153 946 1217 56 load_elf_binary binfmt_elf.c 904 102 82 44 disk_put_part genhd.h 206 1 1 50 dst_gc_task dst.c 82 0 19 100 tcp_mss_split_point tcp_output.c 1126 As you can see by the above, there's a bit of work to do in rethinking the use of some unlikelys and likelys. Note: the unlikely case had 71 hits that were more than 25%. Note: After submitting my first version of this patch, Andrew Morton showed me a version written by Daniel Walker, where I picked up the following ideas from: 1) Using __builtin_constant_p to avoid profiling fixed values. 2) Using __FILE__ instead of instruction pointers. 3) Using the preprocessor to stop all profiling of likely annotations from vsyscall_64.c. Thanks to Andrew Morton, Arjan van de Ven, Theodore Tso and Ingo Molnar for their feed back on this patch. (*) Not ever unlikely is recorded, those that are used by vsyscalls (a few of them) had to have profiling disabled. Signed-off-by: Steven Rostedt Cc: Andrew Morton Cc: Frederic Weisbecker Cc: Theodore Tso Cc: Arjan van de Ven Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsyscall_64.c | 8 ++ include/asm-generic/vmlinux.lds.h | 14 +++- include/linux/compiler.h | 61 +++++++++++++- kernel/trace/Kconfig | 16 ++++ kernel/trace/Makefile | 1 + kernel/trace/trace_unlikely.c | 164 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 261 insertions(+), 3 deletions(-) create mode 100644 kernel/trace/trace_unlikely.c (limited to 'include') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 0b8b6690a86d..2f90202e59b3 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -17,6 +17,14 @@ * want per guest time just set the kernel.vsyscall64 sysctl to 0. */ +/* Protect userspace from profiling */ +#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +# undef likely +# undef unlikely +# define likely(x) likely_notrace(x) +# define unlikely(x) unlikely_notrace(x) +#endif + #include #include #include diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 80744606bad1..e10beb5335c9 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -45,6 +45,17 @@ #define MCOUNT_REC() #endif +#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \ + *(_ftrace_likely) \ + VMLINUX_SYMBOL(__stop_likely_profile) = .; \ + VMLINUX_SYMBOL(__start_unlikely_profile) = .; \ + *(_ftrace_unlikely) \ + VMLINUX_SYMBOL(__stop_unlikely_profile) = .; +#else +#define LIKELY_PROFILE() +#endif + /* .data section */ #define DATA_DATA \ *(.data) \ @@ -62,7 +73,8 @@ VMLINUX_SYMBOL(__stop___markers) = .; \ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ - VMLINUX_SYMBOL(__stop___tracepoints) = .; + VMLINUX_SYMBOL(__stop___tracepoints) = .; \ + LIKELY_PROFILE() #define RO_DATA(align) \ . = ALIGN((align)); \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 98115d9d04da..935e30cfaf3c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -59,8 +59,65 @@ extern void __chk_io_ptr(const volatile void __iomem *); * specific implementations come from the above header files */ -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) +#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +struct ftrace_likely_data { + const char *func; + const char *file; + unsigned line; + unsigned long correct; + unsigned long incorrect; +}; +void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); + +#define likely_notrace(x) __builtin_expect(!!(x), 1) +#define unlikely_notrace(x) __builtin_expect(!!(x), 0) + +#define likely_check(x) ({ \ + int ______r; \ + static struct ftrace_likely_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_likely"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______f.line = __LINE__; \ + ______r = likely_notrace(x); \ + ftrace_likely_update(&______f, ______r, 1); \ + ______r; \ + }) +#define unlikely_check(x) ({ \ + int ______r; \ + static struct ftrace_likely_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_unlikely"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______f.line = __LINE__; \ + ______r = unlikely_notrace(x); \ + ftrace_likely_update(&______f, ______r, 0); \ + ______r; \ + }) + +/* + * Using __builtin_constant_p(x) to ignore cases where the return + * value is always the same. This idea is taken from a similar patch + * written by Daniel Walker. + */ +# ifndef likely +# define likely(x) (__builtin_constant_p(x) ? !!(x) : likely_check(x)) +# endif +# ifndef unlikely +# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : unlikely_check(x)) +# endif +#else +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) +#endif /* Optimization barrier */ #ifndef barrier diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d986216c8327..a604f24c755f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -159,6 +159,22 @@ config BOOT_TRACER selected, because the self-tests are an initcall as well and that would invalidate the boot trace. ) +config TRACE_UNLIKELY_PROFILE + bool "Trace likely/unlikely profiler" + depends on DEBUG_KERNEL + select TRACING + help + This tracer profiles all the the likely and unlikely macros + in the kernel. It will display the results in: + + /debugfs/tracing/profile_likely + /debugfs/tracing/profile_unlikely + + Note: this will add a significant overhead, only turn this + on if you need to profile the system's use of these macros. + + Say N if unsure. + config STACK_TRACER bool "Trace max stack" depends on HAVE_FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 3e1f361bbc17..98e70ee27986 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -25,5 +25,6 @@ obj-$(CONFIG_STACK_TRACER) += trace_stack.o obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_BOOT_TRACER) += trace_boot.o obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o +obj-$(CONFIG_TRACE_UNLIKELY_PROFILE) += trace_unlikely.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace_unlikely.c b/kernel/trace/trace_unlikely.c new file mode 100644 index 000000000000..94932696069f --- /dev/null +++ b/kernel/trace/trace_unlikely.c @@ -0,0 +1,164 @@ +/* + * unlikely profiler + * + * Copyright (C) 2008 Steven Rostedt + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "trace.h" + +void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect) +{ + /* FIXME: Make this atomic! */ + if (val == expect) + f->correct++; + else + f->incorrect++; +} +EXPORT_SYMBOL(ftrace_likely_update); + +struct ftrace_pointer { + void *start; + void *stop; +}; + +static void * +t_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct ftrace_pointer *f = m->private; + struct ftrace_likely_data *p = v; + + (*pos)++; + + if (v == (void *)1) + return f->start; + + ++p; + + if ((void *)p >= (void *)f->stop) + return NULL; + + return p; +} + +static void *t_start(struct seq_file *m, loff_t *pos) +{ + void *t = (void *)1; + loff_t l = 0; + + for (; t && l < *pos; t = t_next(m, t, &l)) + ; + + return t; +} + +static void t_stop(struct seq_file *m, void *p) +{ +} + +static int t_show(struct seq_file *m, void *v) +{ + struct ftrace_likely_data *p = v; + const char *f; + unsigned long percent; + + if (v == (void *)1) { + seq_printf(m, " correct incorrect %% " + " Function " + " File Line\n" + " ------- --------- - " + " -------- " + " ---- ----\n"); + return 0; + } + + /* Only print the file, not the path */ + f = p->file + strlen(p->file); + while (f >= p->file && *f != '/') + f--; + f++; + + if (p->correct) { + percent = p->incorrect * 100; + percent /= p->correct + p->incorrect; + } else + percent = p->incorrect ? 100 : 0; + + seq_printf(m, "%8lu %8lu %3lu ", p->correct, p->incorrect, percent); + seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line); + return 0; +} + +static struct seq_operations tracing_likely_seq_ops = { + .start = t_start, + .next = t_next, + .stop = t_stop, + .show = t_show, +}; + +static int tracing_likely_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = seq_open(file, &tracing_likely_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = (void *)inode->i_private; + } + + return ret; +} + +static struct file_operations tracing_likely_fops = { + .open = tracing_likely_open, + .read = seq_read, + .llseek = seq_lseek, +}; + +extern unsigned long __start_likely_profile[]; +extern unsigned long __stop_likely_profile[]; +extern unsigned long __start_unlikely_profile[]; +extern unsigned long __stop_unlikely_profile[]; + +static struct ftrace_pointer ftrace_likely_pos = { + .start = __start_likely_profile, + .stop = __stop_likely_profile, +}; + +static struct ftrace_pointer ftrace_unlikely_pos = { + .start = __start_unlikely_profile, + .stop = __stop_unlikely_profile, +}; + +static __init int ftrace_unlikely_init(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + + entry = debugfs_create_file("profile_likely", 0444, d_tracer, + &ftrace_likely_pos, + &tracing_likely_fops); + if (!entry) + pr_warning("Could not create debugfs 'profile_likely' entry\n"); + + entry = debugfs_create_file("profile_unlikely", 0444, d_tracer, + &ftrace_unlikely_pos, + &tracing_likely_fops); + if (!entry) + pr_warning("Could not create debugfs" + " 'profile_unlikely' entry\n"); + + return 0; +} + +device_initcall(ftrace_unlikely_init); -- cgit v1.2.3 From e25cf3db560e803292946ef23a30c69e341ce56f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 17 Oct 2008 15:55:07 +0200 Subject: lockdep: include/linux/lockdep.h - fix warning in net/bluetooth/af_bluetooth.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix this warning: net/bluetooth/af_bluetooth.c:60: warning: ‘bt_key_strings’ defined but not used net/bluetooth/af_bluetooth.c:71: warning: ‘bt_slock_key_strings’ defined but not used this is a lockdep macro problem in the !LOCKDEP case. We cannot convert it to an inline because the macro works on multiple types, but we can mark the parameter used. [ also clean up a misaligned tab in sock_lock_init_class_and_name() ] [ also remove #ifdefs from around af_family_clock_key strings - which were certainly added to get rid of the ugly build warnings. ] Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 5 +++-- include/net/sock.h | 2 +- net/core/sock.c | 2 -- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index fc9f8e88123b..8956daf64abd 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -336,10 +336,11 @@ static inline void lockdep_on(void) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) # define lockdep_info() do { } while (0) -# define lockdep_init_map(lock, name, key, sub) do { (void)(key); } while (0) +# define lockdep_init_map(lock, name, key, sub) \ + do { (void)(name); (void)(key); } while (0) # define lockdep_set_class(lock, key) do { (void)(key); } while (0) # define lockdep_set_class_and_name(lock, key, name) \ - do { (void)(key); } while (0) + do { (void)(key); (void)(name); } while (0) #define lockdep_set_class_and_subclass(lock, key, sub) \ do { (void)(key); } while (0) #define lockdep_set_subclass(lock, sub) do { } while (0) diff --git a/include/net/sock.h b/include/net/sock.h index c04f9e18ea22..2f47107f6d0f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -815,7 +815,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) */ #define sock_lock_init_class_and_name(sk, sname, skey, name, key) \ do { \ - sk->sk_lock.owned = 0; \ + sk->sk_lock.owned = 0; \ init_waitqueue_head(&sk->sk_lock.wq); \ spin_lock_init(&(sk)->sk_lock.slock); \ debug_check_no_locks_freed((void *)&(sk)->sk_lock, \ diff --git a/net/core/sock.c b/net/core/sock.c index 5e2a3132a8c9..341e39456952 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -136,7 +136,6 @@ static struct lock_class_key af_family_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; -#ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Make lock validator output more readable. (we pre-construct these * strings build-time, so that runtime initialization of socket @@ -187,7 +186,6 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , "clock-AF_MAX" }; -#endif /* * sk_callback_lock locking rules are per-address-family, -- cgit v1.2.3 From 2b7d0390a6d6d595f43ea3806639664afe5b9ebe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 Nov 2008 13:17:38 +0100 Subject: tracing: branch tracer, fix vdso crash Impact: fix bootup crash the branch tracer missed arch/x86/vdso/vclock_gettime.c from disabling tracing, which caused such bootup crashes: [ 201.840097] init[1]: segfault at 7fffed3fe7c0 ip 00007fffed3fea2e sp 000077 also clean up the ugly ifdefs in arch/x86/kernel/vsyscall_64.c by creating DISABLE_UNLIKELY_PROFILE facility for code to turn off instrumentation on a per file basis. Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsyscall_64.c | 9 ++------- arch/x86/vdso/vclock_gettime.c | 3 +++ include/linux/compiler.h | 6 +++++- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 2f90202e59b3..ece02932ea57 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -17,13 +17,8 @@ * want per guest time just set the kernel.vsyscall64 sysctl to 0. */ -/* Protect userspace from profiling */ -#ifdef CONFIG_TRACE_UNLIKELY_PROFILE -# undef likely -# undef unlikely -# define likely(x) likely_notrace(x) -# define unlikely(x) unlikely_notrace(x) -#endif +/* Disable profiling for userspace code: */ +#define DISABLE_UNLIKELY_PROFILE #include #include diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 1ef0f90813d6..6e667631e7dc 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -9,6 +9,9 @@ * Also alternative() doesn't work. */ +/* Disable profiling for userspace code: */ +#define DISABLE_UNLIKELY_PROFILE + #include #include #include diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 935e30cfaf3c..63b7d9089d6e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -59,7 +59,11 @@ extern void __chk_io_ptr(const volatile void __iomem *); * specific implementations come from the above header files */ -#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +/* + * Note: DISABLE_UNLIKELY_PROFILE can be used by special lowlevel code + * to disable branch tracing on a per file basis. + */ +#if defined(CONFIG_TRACE_UNLIKELY_PROFILE) && !defined(DISABLE_UNLIKELY_PROFILE) struct ftrace_likely_data { const char *func; const char *file; -- cgit v1.2.3 From c2eb9c4ea383aee154e7139395872c4da629e715 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 12 Nov 2008 16:31:37 +0100 Subject: ALSA: when card identification is changed, change also /proc/asound symlink Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai --- include/sound/info.h | 2 ++ sound/core/info.c | 17 +++++++++++++++++ sound/core/init.c | 1 + 3 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/sound/info.h b/include/sound/info.h index 8ae72e74f898..46f702a76e4f 100644 --- a/include/sound/info.h +++ b/include/sound/info.h @@ -126,6 +126,7 @@ int snd_info_card_create(struct snd_card * card); int snd_info_card_register(struct snd_card * card); int snd_info_card_free(struct snd_card * card); void snd_info_card_disconnect(struct snd_card * card); +void snd_info_card_id_change(struct snd_card * card); int snd_info_register(struct snd_info_entry * entry); /* for card drivers */ @@ -160,6 +161,7 @@ static inline int snd_info_card_create(struct snd_card * card) { return 0; } static inline int snd_info_card_register(struct snd_card * card) { return 0; } static inline int snd_info_card_free(struct snd_card * card) { return 0; } static inline void snd_info_card_disconnect(struct snd_card * card) { } +static inline void snd_info_card_id_change(struct snd_card * card) { } static inline int snd_info_register(struct snd_info_entry * entry) { return 0; } static inline int snd_card_proc_new(struct snd_card *card, const char *name, diff --git a/sound/core/info.c b/sound/core/info.c index 527b207462b0..70fa87189f36 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -652,6 +652,23 @@ int snd_info_card_register(struct snd_card *card) return 0; } +/* + * called on card->id change + */ +void snd_info_card_id_change(struct snd_card *card) +{ + mutex_lock(&info_mutex); + if (card->proc_root_link) { + snd_remove_proc_entry(snd_proc_root, card->proc_root_link); + card->proc_root_link = NULL; + } + if (strcmp(card->id, card->proc_root->name)) + card->proc_root_link = proc_symlink(card->id, + snd_proc_root, + card->proc_root->name); + mutex_unlock(&info_mutex); +} + /* * de-register the card proc file * called from init.c diff --git a/sound/core/init.c b/sound/core/init.c index 5ff297d1d89a..af1e407ca27f 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -571,6 +571,7 @@ card_id_store_attr(struct device *dev, struct device_attribute *attr, goto __exist; } strcpy(card->id, buf1); + snd_info_card_id_change(card); mutex_unlock(&snd_card_mutex); return count; -- cgit v1.2.3 From fd64138c0eff8351b93ef99f7da929bb8a49b9ed Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 12 Nov 2008 16:53:47 +0100 Subject: ALSA: include/sound/info.h - coding style changed Change coding style to be more acceptable by checkpatch.pl. Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai --- include/sound/info.h | 108 +++++++++++++++++++++++++++------------------------ 1 file changed, 57 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/sound/info.h b/include/sound/info.h index 46f702a76e4f..7c2ee1a21b00 100644 --- a/include/sound/info.h +++ b/include/sound/info.h @@ -40,30 +40,34 @@ struct snd_info_buffer { struct snd_info_entry; struct snd_info_entry_text { - void (*read) (struct snd_info_entry *entry, struct snd_info_buffer *buffer); - void (*write) (struct snd_info_entry *entry, struct snd_info_buffer *buffer); + void (*read)(struct snd_info_entry *entry, + struct snd_info_buffer *buffer); + void (*write)(struct snd_info_entry *entry, + struct snd_info_buffer *buffer); }; struct snd_info_entry_ops { - int (*open) (struct snd_info_entry *entry, - unsigned short mode, void **file_private_data); - int (*release) (struct snd_info_entry * entry, - unsigned short mode, void *file_private_data); - long (*read) (struct snd_info_entry *entry, void *file_private_data, - struct file * file, char __user *buf, + int (*open)(struct snd_info_entry *entry, + unsigned short mode, void **file_private_data); + int (*release)(struct snd_info_entry *entry, + unsigned short mode, void *file_private_data); + long (*read)(struct snd_info_entry *entry, void *file_private_data, + struct file *file, char __user *buf, + unsigned long count, unsigned long pos); + long (*write)(struct snd_info_entry *entry, void *file_private_data, + struct file *file, const char __user *buf, unsigned long count, unsigned long pos); - long (*write) (struct snd_info_entry *entry, void *file_private_data, - struct file * file, const char __user *buf, - unsigned long count, unsigned long pos); - long long (*llseek) (struct snd_info_entry *entry, void *file_private_data, - struct file * file, long long offset, int orig); - unsigned int (*poll) (struct snd_info_entry *entry, void *file_private_data, - struct file * file, poll_table * wait); - int (*ioctl) (struct snd_info_entry *entry, void *file_private_data, - struct file * file, unsigned int cmd, unsigned long arg); - int (*mmap) (struct snd_info_entry *entry, void *file_private_data, - struct inode * inode, struct file * file, - struct vm_area_struct * vma); + long long (*llseek)(struct snd_info_entry *entry, + void *file_private_data, struct file *file, + long long offset, int orig); + unsigned int(*poll)(struct snd_info_entry *entry, + void *file_private_data, struct file *file, + poll_table *wait); + int (*ioctl)(struct snd_info_entry *entry, void *file_private_data, + struct file *file, unsigned int cmd, unsigned long arg); + int (*mmap)(struct snd_info_entry *entry, void *file_private_data, + struct inode *inode, struct file *file, + struct vm_area_struct *vma); }; struct snd_info_entry { @@ -106,35 +110,37 @@ void snd_card_info_read_oss(struct snd_info_buffer *buffer); static inline void snd_card_info_read_oss(struct snd_info_buffer *buffer) {} #endif -int snd_iprintf(struct snd_info_buffer * buffer, char *fmt,...) __attribute__ ((format (printf, 2, 3))); +int snd_iprintf(struct snd_info_buffer *buffer, char *fmt, ...) \ + __attribute__ ((format (printf, 2, 3))); int snd_info_init(void); int snd_info_done(void); -int snd_info_get_line(struct snd_info_buffer * buffer, char *line, int len); +int snd_info_get_line(struct snd_info_buffer *buffer, char *line, int len); char *snd_info_get_str(char *dest, char *src, int len); -struct snd_info_entry *snd_info_create_module_entry(struct module * module, +struct snd_info_entry *snd_info_create_module_entry(struct module *module, const char *name, - struct snd_info_entry * parent); -struct snd_info_entry *snd_info_create_card_entry(struct snd_card * card, + struct snd_info_entry *parent); +struct snd_info_entry *snd_info_create_card_entry(struct snd_card *card, const char *name, - struct snd_info_entry * parent); -void snd_info_free_entry(struct snd_info_entry * entry); -int snd_info_store_text(struct snd_info_entry * entry); -int snd_info_restore_text(struct snd_info_entry * entry); - -int snd_info_card_create(struct snd_card * card); -int snd_info_card_register(struct snd_card * card); -int snd_info_card_free(struct snd_card * card); -void snd_info_card_disconnect(struct snd_card * card); -void snd_info_card_id_change(struct snd_card * card); -int snd_info_register(struct snd_info_entry * entry); + struct snd_info_entry *parent); +void snd_info_free_entry(struct snd_info_entry *entry); +int snd_info_store_text(struct snd_info_entry *entry); +int snd_info_restore_text(struct snd_info_entry *entry); + +int snd_info_card_create(struct snd_card *card); +int snd_info_card_register(struct snd_card *card); +int snd_info_card_free(struct snd_card *card); +void snd_info_card_disconnect(struct snd_card *card); +void snd_info_card_id_change(struct snd_card *card); +int snd_info_register(struct snd_info_entry *entry); /* for card drivers */ -int snd_card_proc_new(struct snd_card *card, const char *name, struct snd_info_entry **entryp); +int snd_card_proc_new(struct snd_card *card, const char *name, + struct snd_info_entry **entryp); static inline void snd_info_set_text_ops(struct snd_info_entry *entry, - void *private_data, - void (*read)(struct snd_info_entry *, struct snd_info_buffer *)) + void *private_data, + void (*read)(struct snd_info_entry *, struct snd_info_buffer *)) { entry->private_data = private_data; entry->c.text.read = read; @@ -147,22 +153,22 @@ int snd_info_check_reserved_words(const char *str); #define snd_seq_root NULL #define snd_oss_root NULL -static inline int snd_iprintf(struct snd_info_buffer * buffer, char *fmt,...) { return 0; } +static inline int snd_iprintf(struct snd_info_buffer *buffer, char *fmt, ...) { return 0; } static inline int snd_info_init(void) { return 0; } static inline int snd_info_done(void) { return 0; } -static inline int snd_info_get_line(struct snd_info_buffer * buffer, char *line, int len) { return 0; } +static inline int snd_info_get_line(struct snd_info_buffer *buffer, char *line, int len) { return 0; } static inline char *snd_info_get_str(char *dest, char *src, int len) { return NULL; } -static inline struct snd_info_entry *snd_info_create_module_entry(struct module * module, const char *name, struct snd_info_entry * parent) { return NULL; } -static inline struct snd_info_entry *snd_info_create_card_entry(struct snd_card * card, const char *name, struct snd_info_entry * parent) { return NULL; } -static inline void snd_info_free_entry(struct snd_info_entry * entry) { ; } - -static inline int snd_info_card_create(struct snd_card * card) { return 0; } -static inline int snd_info_card_register(struct snd_card * card) { return 0; } -static inline int snd_info_card_free(struct snd_card * card) { return 0; } -static inline void snd_info_card_disconnect(struct snd_card * card) { } -static inline void snd_info_card_id_change(struct snd_card * card) { } -static inline int snd_info_register(struct snd_info_entry * entry) { return 0; } +static inline struct snd_info_entry *snd_info_create_module_entry(struct module *module, const char *name, struct snd_info_entry *parent) { return NULL; } +static inline struct snd_info_entry *snd_info_create_card_entry(struct snd_card *card, const char *name, struct snd_info_entry *parent) { return NULL; } +static inline void snd_info_free_entry(struct snd_info_entry *entry) { ; } + +static inline int snd_info_card_create(struct snd_card *card) { return 0; } +static inline int snd_info_card_register(struct snd_card *card) { return 0; } +static inline int snd_info_card_free(struct snd_card *card) { return 0; } +static inline void snd_info_card_disconnect(struct snd_card *card) { } +static inline void snd_info_card_id_change(struct snd_card *card) { } +static inline int snd_info_register(struct snd_info_entry *entry) { return 0; } static inline int snd_card_proc_new(struct snd_card *card, const char *name, struct snd_info_entry **entryp) { return -EINVAL; } -- cgit v1.2.3 From 2ed84eeb8808cf3c9f039213ca137ffd7d753f0e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 12 Nov 2008 15:24:24 -0500 Subject: trace: rename unlikely profiler to branch profiler Impact: name change of unlikely tracer and profiler Ingo Molnar suggested changing the config from UNLIKELY_PROFILE to BRANCH_PROFILING. I never did like the "unlikely" name so I went one step farther, and renamed all the unlikely configurations to a "BRANCH" variant. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/vsyscall_64.c | 2 +- arch/x86/vdso/vclock_gettime.c | 2 +- include/asm-generic/vmlinux.lds.h | 2 +- include/linux/compiler.h | 19 ++++++++++--------- kernel/trace/Kconfig | 10 +++++----- kernel/trace/Makefile | 7 +++---- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 6 +++--- kernel/trace/trace_unlikely.c | 4 ++-- 9 files changed, 27 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index ece02932ea57..6f3d3d4cd973 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -18,7 +18,7 @@ */ /* Disable profiling for userspace code: */ -#define DISABLE_UNLIKELY_PROFILE +#define DISABLE_BRANCH_PROFILING #include #include diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 6e667631e7dc..d9d35824c56f 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -10,7 +10,7 @@ */ /* Disable profiling for userspace code: */ -#define DISABLE_UNLIKELY_PROFILE +#define DISABLE_BRANCH_PROFILING #include #include diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index e10beb5335c9..a5e4ed9baec8 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -45,7 +45,7 @@ #define MCOUNT_REC() #endif -#ifdef CONFIG_TRACE_UNLIKELY_PROFILE +#ifdef CONFIG_TRACE_BRANCH_PROFILING #define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \ *(_ftrace_likely) \ VMLINUX_SYMBOL(__stop_likely_profile) = .; \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 63b7d9089d6e..c7d804a7a4d6 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -59,26 +59,27 @@ extern void __chk_io_ptr(const volatile void __iomem *); * specific implementations come from the above header files */ -/* - * Note: DISABLE_UNLIKELY_PROFILE can be used by special lowlevel code - * to disable branch tracing on a per file basis. - */ -#if defined(CONFIG_TRACE_UNLIKELY_PROFILE) && !defined(DISABLE_UNLIKELY_PROFILE) -struct ftrace_likely_data { +struct ftrace_branch_data { const char *func; const char *file; unsigned line; unsigned long correct; unsigned long incorrect; }; -void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); + +/* + * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code + * to disable branch tracing on a per file basis. + */ +#if defined(CONFIG_TRACE_BRANCH_PROFILING) && !defined(DISABLE_BRANCH_PROFILING) +void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #define likely_notrace(x) __builtin_expect(!!(x), 1) #define unlikely_notrace(x) __builtin_expect(!!(x), 0) #define likely_check(x) ({ \ int ______r; \ - static struct ftrace_likely_data \ + static struct ftrace_branch_data \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_likely"))) \ ______f = { \ @@ -93,7 +94,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect); }) #define unlikely_check(x) ({ \ int ______r; \ - static struct ftrace_likely_data \ + static struct ftrace_branch_data \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_unlikely"))) \ ______f = { \ diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8abcaf821beb..9c89526b6b7c 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -159,7 +159,7 @@ config BOOT_TRACER selected, because the self-tests are an initcall as well and that would invalidate the boot trace. ) -config TRACE_UNLIKELY_PROFILE +config TRACE_BRANCH_PROFILING bool "Trace likely/unlikely profiler" depends on DEBUG_KERNEL select TRACING @@ -175,7 +175,7 @@ config TRACE_UNLIKELY_PROFILE Say N if unsure. -config TRACING_UNLIKELY +config TRACING_BRANCHES bool help Selected by tracers that will trace the likely and unlikely @@ -183,10 +183,10 @@ config TRACING_UNLIKELY profiled. Profiling the tracing infrastructure can only happen when the likelys and unlikelys are not being traced. -config UNLIKELY_TRACER +config BRANCH_TRACER bool "Trace likely/unlikely instances" - depends on TRACE_UNLIKELY_PROFILE - select TRACING_UNLIKELY + depends on TRACE_BRANCH_PROFILING + select TRACING_BRANCHES help This traces the events of likely and unlikely condition calls in the kernel. The difference between this and the diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index c938d03516c0..0087df7ba44e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -11,9 +11,8 @@ obj-y += trace_selftest_dynamic.o endif # If unlikely tracing is enabled, do not trace these files -ifdef CONFIG_TRACING_UNLIKELY -KBUILD_CFLAGS += '-Dlikely(x)=likely_notrace(x)' -KBUILD_CFLAGS += '-Dunlikely(x)=unlikely_notrace(x)' +ifdef CONFIG_TRACING_BRANCHES +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING endif obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o @@ -31,6 +30,6 @@ obj-$(CONFIG_STACK_TRACER) += trace_stack.o obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_BOOT_TRACER) += trace_boot.o obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o -obj-$(CONFIG_TRACE_UNLIKELY_PROFILE) += trace_unlikely.o +obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_unlikely.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d842db14a59b..bad59d32a4a9 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -258,7 +258,7 @@ static const char *trace_options[] = { "sched-tree", "ftrace_printk", "ftrace_preempt", -#ifdef CONFIG_UNLIKELY_TRACER +#ifdef CONFIG_BRANCH_TRACER "unlikely", #endif NULL diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9635aa2c4fc1..dccae6312941 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -468,7 +468,7 @@ enum trace_iterator_flags { TRACE_ITER_SCHED_TREE = 0x200, TRACE_ITER_PRINTK = 0x400, TRACE_ITER_PREEMPTONLY = 0x800, -#ifdef CONFIG_UNLIKELY_TRACER +#ifdef CONFIG_BRANCH_TRACER TRACE_ITER_UNLIKELY = 0x1000, #endif }; @@ -530,7 +530,7 @@ static inline void ftrace_preempt_enable(int resched) preempt_enable_notrace(); } -#ifdef CONFIG_UNLIKELY_TRACER +#ifdef CONFIG_BRANCH_TRACER extern int enable_unlikely_tracing(struct trace_array *tr); extern void disable_unlikely_tracing(void); static inline int trace_unlikely_enable(struct trace_array *tr) @@ -552,6 +552,6 @@ static inline int trace_unlikely_enable(struct trace_array *tr) static inline void trace_unlikely_disable(void) { } -#endif /* CONFIG_UNLIKELY_TRACER */ +#endif /* CONFIG_BRANCH_TRACER */ #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_unlikely.c b/kernel/trace/trace_unlikely.c index 7290e0e7b4e3..856eb3b7f694 100644 --- a/kernel/trace/trace_unlikely.c +++ b/kernel/trace/trace_unlikely.c @@ -15,7 +15,7 @@ #include #include "trace.h" -#ifdef CONFIG_UNLIKELY_TRACER +#ifdef CONFIG_BRANCH_TRACER static int unlikely_tracing_enabled __read_mostly; static DEFINE_MUTEX(unlikely_tracing_mutex); @@ -119,7 +119,7 @@ static inline void trace_likely_condition(struct ftrace_likely_data *f, int val, int expect) { } -#endif /* CONFIG_UNLIKELY_TRACER */ +#endif /* CONFIG_BRANCH_TRACER */ void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect) { -- cgit v1.2.3 From 2378982487c492541d17adc0a870e7e83b07ba43 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 12 Nov 2008 23:25:32 -0800 Subject: net: ifdef struct sock::sk_async_wait_queue Every user is under CONFIG_NET_DMA already, so ifdef field as well. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 08291c1be41e..8b2b82131b67 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -239,7 +239,9 @@ struct sock { int sk_sndbuf; struct sk_buff_head sk_receive_queue; struct sk_buff_head sk_write_queue; +#ifdef CONFIG_NET_DMA struct sk_buff_head sk_async_wait_queue; +#endif int sk_wmem_queued; int sk_forward_alloc; gfp_t sk_allocation; -- cgit v1.2.3 From da9592edebceeba1b9301beafe80ec8b9c2db0ce Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:05 +1100 Subject: CRED: Wrap task credential accesses in the filesystem subsystem Wrap access to task credentials so that they can be separated more easily from the task_struct during the introduction of COW creds. Change most current->(|e|s|fs)[ug]id to current_(|e|s|fs)[ug]id(). Change some task->e?[ug]id to task_e?[ug]id(). In some places it makes more sense to use RCU directly rather than a convenient wrapper; these will be addressed by later patches. Signed-off-by: David Howells Reviewed-by: James Morris Acked-by: Serge Hallyn Cc: Al Viro Signed-off-by: James Morris --- fs/anon_inodes.c | 4 ++-- fs/attr.c | 4 ++-- fs/binfmt_elf_fdpic.c | 8 ++++---- fs/dquot.c | 4 ++-- fs/exec.c | 18 +++++++++--------- fs/fcntl.c | 2 +- fs/inotify_user.c | 2 +- fs/ioprio.c | 4 ++-- fs/locks.c | 2 +- fs/namei.c | 10 ++++++---- fs/namespace.c | 2 +- fs/pipe.c | 4 ++-- fs/posix_acl.c | 4 ++-- fs/quota.c | 4 ++-- include/linux/fs.h | 2 +- 15 files changed, 38 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 3662dd44896b..c16d9be1b017 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -154,8 +154,8 @@ static struct inode *anon_inode_mkinode(void) */ inode->i_state = I_DIRTY; inode->i_mode = S_IRUSR | S_IWUSR; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; return inode; } diff --git a/fs/attr.c b/fs/attr.c index 7a83819f6ba2..f4360192a938 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -29,13 +29,13 @@ int inode_change_ok(struct inode *inode, struct iattr *attr) /* Make sure a caller can chown. */ if ((ia_valid & ATTR_UID) && - (current->fsuid != inode->i_uid || + (current_fsuid() != inode->i_uid || attr->ia_uid != inode->i_uid) && !capable(CAP_CHOWN)) goto error; /* Make sure caller can chgrp. */ if ((ia_valid & ATTR_GID) && - (current->fsuid != inode->i_uid || + (current_fsuid() != inode->i_uid || (!in_group_p(attr->ia_gid) && attr->ia_gid != inode->i_gid)) && !capable(CAP_CHOWN)) goto error; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 5b5424cb3391..488584c87512 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -623,10 +623,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr); - NEW_AUX_ENT(AT_UID, (elf_addr_t) current->uid); - NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->euid); - NEW_AUX_ENT(AT_GID, (elf_addr_t) current->gid); - NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->egid); + NEW_AUX_ENT(AT_UID, (elf_addr_t) current_uid()); + NEW_AUX_ENT(AT_EUID, (elf_addr_t) current_euid()); + NEW_AUX_ENT(AT_GID, (elf_addr_t) current_gid()); + NEW_AUX_ENT(AT_EGID, (elf_addr_t) current_egid()); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); diff --git a/fs/dquot.c b/fs/dquot.c index 5e95261005b2..c237ccc8581c 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -874,7 +874,7 @@ static inline int need_print_warning(struct dquot *dquot) switch (dquot->dq_type) { case USRQUOTA: - return current->fsuid == dquot->dq_id; + return current_fsuid() == dquot->dq_id; case GRPQUOTA: return in_group_p(dquot->dq_id); } @@ -981,7 +981,7 @@ static void send_warning(const struct dquot *dquot, const char warntype) MINOR(dquot->dq_sb->s_dev)); if (ret) goto attr_err_out; - ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current->user->uid); + ret = nla_put_u64(skb, QUOTA_NL_A_CAUSED_ID, current_uid()); if (ret) goto attr_err_out; genlmsg_end(skb, msg_head); diff --git a/fs/exec.c b/fs/exec.c index 4e834f16d9da..604834f3b208 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -980,7 +980,7 @@ int flush_old_exec(struct linux_binprm * bprm) /* This is the point of no return */ current->sas_ss_sp = current->sas_ss_size = 0; - if (current->euid == current->uid && current->egid == current->gid) + if (current_euid() == current_uid() && current_egid() == current_gid()) set_dumpable(current->mm, 1); else set_dumpable(current->mm, suid_dumpable); @@ -1007,7 +1007,7 @@ int flush_old_exec(struct linux_binprm * bprm) */ current->mm->task_size = TASK_SIZE; - if (bprm->e_uid != current->euid || bprm->e_gid != current->egid) { + if (bprm->e_uid != current_euid() || bprm->e_gid != current_egid()) { suid_keys(current); set_dumpable(current->mm, suid_dumpable); current->pdeath_signal = 0; @@ -1047,8 +1047,8 @@ int prepare_binprm(struct linux_binprm *bprm) if (bprm->file->f_op == NULL) return -EACCES; - bprm->e_uid = current->euid; - bprm->e_gid = current->egid; + bprm->e_uid = current_euid(); + bprm->e_gid = current_egid(); if(!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) { /* Set-uid? */ @@ -1096,7 +1096,7 @@ void compute_creds(struct linux_binprm *bprm) { int unsafe; - if (bprm->e_uid != current->uid) { + if (bprm->e_uid != current_uid()) { suid_keys(current); current->pdeath_signal = 0; } @@ -1424,7 +1424,7 @@ static int format_corename(char *corename, long signr) /* uid */ case 'u': rc = snprintf(out_ptr, out_end - out_ptr, - "%d", current->uid); + "%d", current_uid()); if (rc > out_end - out_ptr) goto out; out_ptr += rc; @@ -1432,7 +1432,7 @@ static int format_corename(char *corename, long signr) /* gid */ case 'g': rc = snprintf(out_ptr, out_end - out_ptr, - "%d", current->gid); + "%d", current_gid()); if (rc > out_end - out_ptr) goto out; out_ptr += rc; @@ -1709,7 +1709,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) struct inode * inode; struct file * file; int retval = 0; - int fsuid = current->fsuid; + int fsuid = current_fsuid(); int flag = 0; int ispipe = 0; unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; @@ -1815,7 +1815,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) * Dont allow local users get cute and trick others to coredump * into their pre-created files: */ - if (inode->i_uid != current->fsuid) + if (inode->i_uid != current_fsuid()) goto close_fail; if (!file->f_op) goto close_fail; diff --git a/fs/fcntl.c b/fs/fcntl.c index ac4f7db9f134..bf049a805e59 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -211,7 +211,7 @@ int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, if (err) return err; - f_modown(filp, pid, type, current->uid, current->euid, force); + f_modown(filp, pid, type, current_uid(), current_euid(), force); return 0; } EXPORT_SYMBOL(__f_setown); diff --git a/fs/inotify_user.c b/fs/inotify_user.c index d367e9b92862..e2425bbd871f 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -601,7 +601,7 @@ asmlinkage long sys_inotify_init1(int flags) goto out_put_fd; } - user = get_uid(current->user); + user = get_current_user(); if (unlikely(atomic_read(&user->inotify_devs) >= inotify_max_user_instances)) { ret = -EMFILE; diff --git a/fs/ioprio.c b/fs/ioprio.c index da3cc460d4df..68d2cd807118 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -32,8 +32,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) int err; struct io_context *ioc; - if (task->uid != current->euid && - task->uid != current->uid && !capable(CAP_SYS_NICE)) + if (task->uid != current_euid() && + task->uid != current_uid() && !capable(CAP_SYS_NICE)) return -EPERM; err = security_task_setioprio(task, ioprio); diff --git a/fs/locks.c b/fs/locks.c index 09062e3ff104..46a2e12f7d42 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1349,7 +1349,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) struct inode *inode = dentry->d_inode; int error, rdlease_count = 0, wrlease_count = 0; - if ((current->fsuid != inode->i_uid) && !capable(CAP_LEASE)) + if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE)) return -EACCES; if (!S_ISREG(inode->i_mode)) return -EINVAL; diff --git a/fs/namei.c b/fs/namei.c index 09ce58e49e72..42d7b7606936 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -186,7 +186,7 @@ int generic_permission(struct inode *inode, int mask, mask &= MAY_READ | MAY_WRITE | MAY_EXEC; - if (current->fsuid == inode->i_uid) + if (current_fsuid() == inode->i_uid) mode >>= 6; else { if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { @@ -441,7 +441,7 @@ static int exec_permission_lite(struct inode *inode) if (inode->i_op && inode->i_op->permission) return -EAGAIN; - if (current->fsuid == inode->i_uid) + if (current_fsuid() == inode->i_uid) mode >>= 6; else if (in_group_p(inode->i_gid)) mode >>= 3; @@ -1334,11 +1334,13 @@ static int user_path_parent(int dfd, const char __user *path, */ static inline int check_sticky(struct inode *dir, struct inode *inode) { + uid_t fsuid = current_fsuid(); + if (!(dir->i_mode & S_ISVTX)) return 0; - if (inode->i_uid == current->fsuid) + if (inode->i_uid == fsuid) return 0; - if (dir->i_uid == current->fsuid) + if (dir->i_uid == fsuid) return 0; return !capable(CAP_FOWNER); } diff --git a/fs/namespace.c b/fs/namespace.c index cce46702d33c..d8bc2c4704a5 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1176,7 +1176,7 @@ static int mount_is_safe(struct path *path) if (S_ISLNK(path->dentry->d_inode->i_mode)) return -EPERM; if (path->dentry->d_inode->i_mode & S_ISVTX) { - if (current->uid != path->dentry->d_inode->i_uid) + if (current_uid() != path->dentry->d_inode->i_uid) return -EPERM; } if (inode_permission(path->dentry->d_inode, MAY_WRITE)) diff --git a/fs/pipe.c b/fs/pipe.c index 7aea8b89baac..aaf797bd57b9 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -899,8 +899,8 @@ static struct inode * get_pipe_inode(void) */ inode->i_state = I_DIRTY; inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; return inode; diff --git a/fs/posix_acl.c b/fs/posix_acl.c index aec931e09973..39df95a0ec25 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -217,11 +217,11 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want) switch(pa->e_tag) { case ACL_USER_OBJ: /* (May have been checked already) */ - if (inode->i_uid == current->fsuid) + if (inode->i_uid == current_fsuid()) goto check_perm; break; case ACL_USER: - if (pa->e_id == current->fsuid) + if (pa->e_id == current_fsuid()) goto mask; break; case ACL_GROUP_OBJ: diff --git a/fs/quota.c b/fs/quota.c index 7f4386ebc23a..b7fe44e01618 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -79,7 +79,7 @@ static int generic_quotactl_valid(struct super_block *sb, int type, int cmd, qid /* Check privileges */ if (cmd == Q_GETQUOTA) { - if (((type == USRQUOTA && current->euid != id) || + if (((type == USRQUOTA && current_euid() != id) || (type == GRPQUOTA && !in_egroup_p(id))) && !capable(CAP_SYS_ADMIN)) return -EPERM; @@ -130,7 +130,7 @@ static int xqm_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t i /* Check privileges */ if (cmd == Q_XGETQUOTA) { - if (((type == XQM_USRQUOTA && current->euid != id) || + if (((type == XQM_USRQUOTA && current_euid() != id) || (type == XQM_GRPQUOTA && !in_egroup_p(id))) && !capable(CAP_SYS_ADMIN)) return -EPERM; diff --git a/include/linux/fs.h b/include/linux/fs.h index 0dcdd9458f4b..b3d404aaabed 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1193,7 +1193,7 @@ enum { #define has_fs_excl() atomic_read(¤t->fs_excl) #define is_owner_or_cap(inode) \ - ((current->fsuid == (inode)->i_uid) || capable(CAP_FOWNER)) + ((current_fsuid() == (inode)->i_uid) || capable(CAP_FOWNER)) /* not quite ready to be deprecated, but... */ extern void lock_super(struct super_block *); -- cgit v1.2.3 From 8192b0c482d7078fcdcb4854341b977426f6f09b Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:10 +1100 Subject: CRED: Wrap task credential accesses in the networking subsystem Wrap access to task credentials so that they can be separated more easily from the task_struct during the introduction of COW creds. Change most current->(|e|s|fs)[ug]id to current_(|e|s|fs)[ug]id(). Change some task->e?[ug]id to task_e?[ug]id(). In some places it makes more sense to use RCU directly rather than a convenient wrapper; these will be addressed by later patches. Signed-off-by: David Howells Reviewed-by: James Morris Acked-by: Serge Hallyn Cc: netdev@vger.kernel.org Signed-off-by: James Morris --- include/net/scm.h | 4 ++-- net/core/dev.c | 8 ++++++-- net/core/scm.c | 8 ++++---- net/socket.c | 4 ++-- 4 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/scm.h b/include/net/scm.h index 06df126103ca..f160116db54a 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -54,8 +54,8 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) { struct task_struct *p = current; - scm->creds.uid = p->uid; - scm->creds.gid = p->gid; + scm->creds.uid = current_uid(); + scm->creds.gid = current_gid(); scm->creds.pid = task_tgid_vnr(p); scm->fp = NULL; scm->seq = 0; diff --git a/net/core/dev.c b/net/core/dev.c index d9038e328cc1..262df226b3c9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2958,6 +2958,8 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; + uid_t uid; + gid_t gid; ASSERT_RTNL(); @@ -2982,15 +2984,17 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) printk(KERN_INFO "device %s %s promiscuous mode\n", dev->name, (dev->flags & IFF_PROMISC) ? "entered" : "left"); - if (audit_enabled) + if (audit_enabled) { + current_uid_gid(&uid, &gid); audit_log(current->audit_context, GFP_ATOMIC, AUDIT_ANOM_PROMISCUOUS, "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u", dev->name, (dev->flags & IFF_PROMISC), (old_flags & IFF_PROMISC), audit_get_loginuid(current), - current->uid, current->gid, + uid, gid, audit_get_sessionid(current)); + } dev_change_rx_flags(dev, IFF_PROMISC); } diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a47..4681d8f9b45b 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -45,10 +45,10 @@ static __inline__ int scm_check_creds(struct ucred *creds) { if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && - ((creds->uid == current->uid || creds->uid == current->euid || - creds->uid == current->suid) || capable(CAP_SETUID)) && - ((creds->gid == current->gid || creds->gid == current->egid || - creds->gid == current->sgid) || capable(CAP_SETGID))) { + ((creds->uid == current_uid() || creds->uid == current_euid() || + creds->uid == current_suid()) || capable(CAP_SETUID)) && + ((creds->gid == current_gid() || creds->gid == current_egid() || + creds->gid == current_sgid()) || capable(CAP_SETGID))) { return 0; } return -EPERM; diff --git a/net/socket.c b/net/socket.c index 57550c3bcabe..62c7729527ff 100644 --- a/net/socket.c +++ b/net/socket.c @@ -491,8 +491,8 @@ static struct socket *sock_alloc(void) sock = SOCKET_I(inode); inode->i_mode = S_IFSOCK | S_IRWXUGO; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); get_cpu_var(sockets_in_use)++; put_cpu_var(sockets_in_use); -- cgit v1.2.3 From e9e349b051d98799b743ebf248cc2d986fedf090 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:13 +1100 Subject: KEYS: Disperse linux/key_ui.h Disperse the bits of linux/key_ui.h as the reason they were put here (keyfs) didn't get in. Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: James Morris --- include/keys/keyring-type.h | 31 +++++++++++++++++++++ include/linux/key-ui.h | 66 --------------------------------------------- security/keys/internal.h | 31 ++++++++++++++++++++- security/keys/keyring.c | 1 + security/keys/request_key.c | 2 ++ 5 files changed, 64 insertions(+), 67 deletions(-) create mode 100644 include/keys/keyring-type.h delete mode 100644 include/linux/key-ui.h (limited to 'include') diff --git a/include/keys/keyring-type.h b/include/keys/keyring-type.h new file mode 100644 index 000000000000..843f872a4b63 --- /dev/null +++ b/include/keys/keyring-type.h @@ -0,0 +1,31 @@ +/* Keyring key type + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _KEYS_KEYRING_TYPE_H +#define _KEYS_KEYRING_TYPE_H + +#include +#include + +/* + * the keyring payload contains a list of the keys to which the keyring is + * subscribed + */ +struct keyring_list { + struct rcu_head rcu; /* RCU deletion hook */ + unsigned short maxkeys; /* max keys this list can hold */ + unsigned short nkeys; /* number of keys currently held */ + unsigned short delkey; /* key to be unlinked by RCU */ + struct key *keys[0]; +}; + + +#endif /* _KEYS_KEYRING_TYPE_H */ diff --git a/include/linux/key-ui.h b/include/linux/key-ui.h deleted file mode 100644 index e8b8a7a5c496..000000000000 --- a/include/linux/key-ui.h +++ /dev/null @@ -1,66 +0,0 @@ -/* key-ui.h: key userspace interface stuff - * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _LINUX_KEY_UI_H -#define _LINUX_KEY_UI_H - -#include - -/* the key tree */ -extern struct rb_root key_serial_tree; -extern spinlock_t key_serial_lock; - -/* required permissions */ -#define KEY_VIEW 0x01 /* require permission to view attributes */ -#define KEY_READ 0x02 /* require permission to read content */ -#define KEY_WRITE 0x04 /* require permission to update / modify */ -#define KEY_SEARCH 0x08 /* require permission to search (keyring) or find (key) */ -#define KEY_LINK 0x10 /* require permission to link */ -#define KEY_SETATTR 0x20 /* require permission to change attributes */ -#define KEY_ALL 0x3f /* all the above permissions */ - -/* - * the keyring payload contains a list of the keys to which the keyring is - * subscribed - */ -struct keyring_list { - struct rcu_head rcu; /* RCU deletion hook */ - unsigned short maxkeys; /* max keys this list can hold */ - unsigned short nkeys; /* number of keys currently held */ - unsigned short delkey; /* key to be unlinked by RCU */ - struct key *keys[0]; -}; - -/* - * check to see whether permission is granted to use a key in the desired way - */ -extern int key_task_permission(const key_ref_t key_ref, - struct task_struct *context, - key_perm_t perm); - -static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) -{ - return key_task_permission(key_ref, current, perm); -} - -extern key_ref_t lookup_user_key(struct task_struct *context, - key_serial_t id, int create, int partial, - key_perm_t perm); - -extern long join_session_keyring(const char *name); - -extern struct key_type *key_type_lookup(const char *type); -extern void key_type_put(struct key_type *ktype); - -#define key_negative_timeout 60 /* default timeout on a negative key's existence */ - - -#endif /* _LINUX_KEY_UI_H */ diff --git a/security/keys/internal.h b/security/keys/internal.h index b39f5c2e2c4b..a60c68138b4d 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -13,7 +13,6 @@ #define _INTERNAL_H #include -#include static inline __attribute__((format(printf, 1, 2))) void no_printk(const char *fmt, ...) @@ -82,6 +81,9 @@ extern struct mutex key_construction_mutex; extern wait_queue_head_t request_key_conswq; +extern struct key_type *key_type_lookup(const char *type); +extern void key_type_put(struct key_type *ktype); + extern int __key_link(struct key *keyring, struct key *key); extern key_ref_t __keyring_search_one(key_ref_t keyring_ref, @@ -118,6 +120,33 @@ extern struct key *request_key_and_link(struct key_type *type, struct key *dest_keyring, unsigned long flags); +extern key_ref_t lookup_user_key(struct task_struct *context, + key_serial_t id, int create, int partial, + key_perm_t perm); + +extern long join_session_keyring(const char *name); + +/* + * check to see whether permission is granted to use a key in the desired way + */ +extern int key_task_permission(const key_ref_t key_ref, + struct task_struct *context, + key_perm_t perm); + +static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) +{ + return key_task_permission(key_ref, current, perm); +} + +/* required permissions */ +#define KEY_VIEW 0x01 /* require permission to view attributes */ +#define KEY_READ 0x02 /* require permission to read content */ +#define KEY_WRITE 0x04 /* require permission to update / modify */ +#define KEY_SEARCH 0x08 /* require permission to search (keyring) or find (key) */ +#define KEY_LINK 0x10 /* require permission to link */ +#define KEY_SETATTR 0x20 /* require permission to change attributes */ +#define KEY_ALL 0x3f /* all the above permissions */ + /* * request_key authorisation */ diff --git a/security/keys/keyring.c b/security/keys/keyring.c index a9ab8affc092..fdf75f901991 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "internal.h" diff --git a/security/keys/request_key.c b/security/keys/request_key.c index a8ebc9520cac..91953c814497 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -19,6 +19,8 @@ #include #include "internal.h" +#define key_negative_timeout 60 /* default timeout on a negative key's existence */ + /* * wait_on_bit() sleep function for uninterruptible waiting */ -- cgit v1.2.3 From 8bbf4976b59fc9fc2861e79cab7beb3f6d647640 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:14 +1100 Subject: KEYS: Alter use of key instantiation link-to-keyring argument Alter the use of the key instantiation and negation functions' link-to-keyring arguments. Currently this specifies a keyring in the target process to link the key into, creating the keyring if it doesn't exist. This, however, can be a problem for copy-on-write credentials as it means that the instantiating process can alter the credentials of the requesting process. This patch alters the behaviour such that: (1) If keyctl_instantiate_key() or keyctl_negate_key() are given a specific keyring by ID (ringid >= 0), then that keyring will be used. (2) If keyctl_instantiate_key() or keyctl_negate_key() are given one of the special constants that refer to the requesting process's keyrings (KEY_SPEC_*_KEYRING, all <= 0), then: (a) If sys_request_key() was given a keyring to use (destringid) then the key will be attached to that keyring. (b) If sys_request_key() was given a NULL keyring, then the key being instantiated will be attached to the default keyring as set by keyctl_set_reqkey_keyring(). (3) No extra link will be made. Decision point (1) follows current behaviour, and allows those instantiators who've searched for a specifically named keyring in the requestor's keyring so as to partition the keys by type to still have their named keyrings. Decision point (2) allows the requestor to make sure that the key or keys that get produced by request_key() go where they want, whilst allowing the instantiator to request that the key is retained. This is mainly useful for situations where the instantiator makes a secondary request, the key for which should be retained by the initial requestor: +-----------+ +--------------+ +--------------+ | | | | | | | Requestor |------->| Instantiator |------->| Instantiator | | | | | | | +-----------+ +--------------+ +--------------+ request_key() request_key() This might be useful, for example, in Kerberos, where the requestor requests a ticket, and then the ticket instantiator requests the TGT, which someone else then has to go and fetch. The TGT, however, should be retained in the keyrings of the requestor, not the first instantiator. To make this explict an extra special keyring constant is also added. Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: James Morris --- include/linux/key.h | 16 +++--- include/linux/keyctl.h | 4 +- kernel/kmod.c | 2 +- security/keys/internal.h | 12 ++-- security/keys/keyctl.c | 118 +++++++++++++++++++++++---------------- security/keys/process_keys.c | 88 ++++++++++++++++++----------- security/keys/request_key.c | 75 +++++++++++++++++-------- security/keys/request_key_auth.c | 5 +- 8 files changed, 199 insertions(+), 121 deletions(-) (limited to 'include') diff --git a/include/linux/key.h b/include/linux/key.h index 1b70e35a71e3..df709e1af3cd 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -287,11 +287,11 @@ extern void key_fsuid_changed(struct task_struct *tsk); extern void key_fsgid_changed(struct task_struct *tsk); extern void key_init(void); -#define __install_session_keyring(tsk, keyring) \ -({ \ - struct key *old_session = tsk->signal->session_keyring; \ - tsk->signal->session_keyring = keyring; \ - old_session; \ +#define __install_session_keyring(keyring) \ +({ \ + struct key *old_session = current->signal->session_keyring; \ + current->signal->session_keyring = keyring; \ + old_session; \ }) #else /* CONFIG_KEYS */ @@ -302,11 +302,11 @@ extern void key_init(void); #define key_revoke(k) do { } while(0) #define key_put(k) do { } while(0) #define key_ref_put(k) do { } while(0) -#define make_key_ref(k, p) ({ NULL; }) -#define key_ref_to_ptr(k) ({ NULL; }) +#define make_key_ref(k, p) NULL +#define key_ref_to_ptr(k) NULL #define is_key_possessed(k) 0 #define switch_uid_keyring(u) do { } while(0) -#define __install_session_keyring(t, k) ({ NULL; }) +#define __install_session_keyring(k) ({ NULL; }) #define copy_keys(f,t) 0 #define copy_thread_group_keys(t) 0 #define exit_keys(t) do { } while(0) diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h index 656ee6b77a4a..c0688eb72093 100644 --- a/include/linux/keyctl.h +++ b/include/linux/keyctl.h @@ -1,6 +1,6 @@ /* keyctl.h: keyctl command IDs * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004, 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -20,6 +20,7 @@ #define KEY_SPEC_USER_SESSION_KEYRING -5 /* - key ID for UID-session keyring */ #define KEY_SPEC_GROUP_KEYRING -6 /* - key ID for GID-specific keyring */ #define KEY_SPEC_REQKEY_AUTH_KEY -7 /* - key ID for assumed request_key auth key */ +#define KEY_SPEC_REQUESTOR_KEYRING -8 /* - key ID for request_key() dest keyring */ /* request-key default keyrings */ #define KEY_REQKEY_DEFL_NO_CHANGE -1 @@ -30,6 +31,7 @@ #define KEY_REQKEY_DEFL_USER_KEYRING 4 #define KEY_REQKEY_DEFL_USER_SESSION_KEYRING 5 #define KEY_REQKEY_DEFL_GROUP_KEYRING 6 +#define KEY_REQKEY_DEFL_REQUESTOR_KEYRING 7 /* keyctl commands */ #define KEYCTL_GET_KEYRING_ID 0 /* ask for a keyring's ID */ diff --git a/kernel/kmod.c b/kernel/kmod.c index 3d3c3ea3a023..f044f8f57703 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -140,7 +140,7 @@ static int ____call_usermodehelper(void *data) /* Unblock all signals and set the session keyring. */ new_session = key_get(sub_info->ring); spin_lock_irq(¤t->sighand->siglock); - old_session = __install_session_keyring(current, new_session); + old_session = __install_session_keyring(new_session); flush_signal_handlers(current, 1); sigemptyset(¤t->blocked); recalc_sigpending(); diff --git a/security/keys/internal.h b/security/keys/internal.h index a60c68138b4d..d1586c629788 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -109,8 +109,9 @@ extern key_ref_t search_process_keyrings(struct key_type *type, extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); -extern int install_thread_keyring(struct task_struct *tsk); -extern int install_process_keyring(struct task_struct *tsk); +extern int install_user_keyrings(void); +extern int install_thread_keyring(void); +extern int install_process_keyring(void); extern struct key *request_key_and_link(struct key_type *type, const char *description, @@ -120,8 +121,7 @@ extern struct key *request_key_and_link(struct key_type *type, struct key *dest_keyring, unsigned long flags); -extern key_ref_t lookup_user_key(struct task_struct *context, - key_serial_t id, int create, int partial, +extern key_ref_t lookup_user_key(key_serial_t id, int create, int partial, key_perm_t perm); extern long join_session_keyring(const char *name); @@ -152,6 +152,7 @@ static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) */ struct request_key_auth { struct key *target_key; + struct key *dest_keyring; struct task_struct *context; void *callout_info; size_t callout_len; @@ -161,7 +162,8 @@ struct request_key_auth { extern struct key_type key_type_request_key_auth; extern struct key *request_key_auth_new(struct key *target, const void *callout_info, - size_t callout_len); + size_t callout_len, + struct key *dest_keyring); extern struct key *key_get_instantiation_authkey(key_serial_t target_id); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 3f09e5b2a784..fcce331eca72 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -103,7 +103,7 @@ asmlinkage long sys_add_key(const char __user *_type, } /* find the target keyring (which must be writable) */ - keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); + keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error3; @@ -185,7 +185,7 @@ asmlinkage long sys_request_key(const char __user *_type, /* get the destination keyring if specified */ dest_ref = NULL; if (destringid) { - dest_ref = lookup_user_key(NULL, destringid, 1, 0, KEY_WRITE); + dest_ref = lookup_user_key(destringid, 1, 0, KEY_WRITE); if (IS_ERR(dest_ref)) { ret = PTR_ERR(dest_ref); goto error3; @@ -235,7 +235,7 @@ long keyctl_get_keyring_ID(key_serial_t id, int create) key_ref_t key_ref; long ret; - key_ref = lookup_user_key(NULL, id, create, 0, KEY_SEARCH); + key_ref = lookup_user_key(id, create, 0, KEY_SEARCH); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; @@ -308,7 +308,7 @@ long keyctl_update_key(key_serial_t id, } /* find the target key (which must be writable) */ - key_ref = lookup_user_key(NULL, id, 0, 0, KEY_WRITE); + key_ref = lookup_user_key(id, 0, 0, KEY_WRITE); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; @@ -336,7 +336,7 @@ long keyctl_revoke_key(key_serial_t id) key_ref_t key_ref; long ret; - key_ref = lookup_user_key(NULL, id, 0, 0, KEY_WRITE); + key_ref = lookup_user_key(id, 0, 0, KEY_WRITE); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; @@ -362,7 +362,7 @@ long keyctl_keyring_clear(key_serial_t ringid) key_ref_t keyring_ref; long ret; - keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); + keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error; @@ -388,13 +388,13 @@ long keyctl_keyring_link(key_serial_t id, key_serial_t ringid) key_ref_t keyring_ref, key_ref; long ret; - keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); + keyring_ref = lookup_user_key(ringid, 1, 0, KEY_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error; } - key_ref = lookup_user_key(NULL, id, 1, 0, KEY_LINK); + key_ref = lookup_user_key(id, 1, 0, KEY_LINK); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; @@ -422,13 +422,13 @@ long keyctl_keyring_unlink(key_serial_t id, key_serial_t ringid) key_ref_t keyring_ref, key_ref; long ret; - keyring_ref = lookup_user_key(NULL, ringid, 0, 0, KEY_WRITE); + keyring_ref = lookup_user_key(ringid, 0, 0, KEY_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error; } - key_ref = lookup_user_key(NULL, id, 0, 0, 0); + key_ref = lookup_user_key(id, 0, 0, 0); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; @@ -464,7 +464,7 @@ long keyctl_describe_key(key_serial_t keyid, char *tmpbuf; long ret; - key_ref = lookup_user_key(NULL, keyid, 0, 1, KEY_VIEW); + key_ref = lookup_user_key(keyid, 0, 1, KEY_VIEW); if (IS_ERR(key_ref)) { /* viewing a key under construction is permitted if we have the * authorisation token handy */ @@ -472,7 +472,7 @@ long keyctl_describe_key(key_serial_t keyid, instkey = key_get_instantiation_authkey(keyid); if (!IS_ERR(instkey)) { key_put(instkey); - key_ref = lookup_user_key(NULL, keyid, + key_ref = lookup_user_key(keyid, 0, 1, 0); if (!IS_ERR(key_ref)) goto okay; @@ -557,7 +557,7 @@ long keyctl_keyring_search(key_serial_t ringid, } /* get the keyring at which to begin the search */ - keyring_ref = lookup_user_key(NULL, ringid, 0, 0, KEY_SEARCH); + keyring_ref = lookup_user_key(ringid, 0, 0, KEY_SEARCH); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error2; @@ -566,7 +566,7 @@ long keyctl_keyring_search(key_serial_t ringid, /* get the destination keyring if specified */ dest_ref = NULL; if (destringid) { - dest_ref = lookup_user_key(NULL, destringid, 1, 0, KEY_WRITE); + dest_ref = lookup_user_key(destringid, 1, 0, KEY_WRITE); if (IS_ERR(dest_ref)) { ret = PTR_ERR(dest_ref); goto error3; @@ -636,7 +636,7 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) long ret; /* find the key first */ - key_ref = lookup_user_key(NULL, keyid, 0, 0, 0); + key_ref = lookup_user_key(keyid, 0, 0, 0); if (IS_ERR(key_ref)) { ret = -ENOKEY; goto error; @@ -699,7 +699,7 @@ long keyctl_chown_key(key_serial_t id, uid_t uid, gid_t gid) if (uid == (uid_t) -1 && gid == (gid_t) -1) goto error; - key_ref = lookup_user_key(NULL, id, 1, 1, KEY_SETATTR); + key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; @@ -804,7 +804,7 @@ long keyctl_setperm_key(key_serial_t id, key_perm_t perm) if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL)) goto error; - key_ref = lookup_user_key(NULL, id, 1, 1, KEY_SETATTR); + key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; @@ -829,6 +829,43 @@ error: } /* end keyctl_setperm_key() */ +/* + * get the destination keyring for instantiation + */ +static long get_instantiation_keyring(key_serial_t ringid, + struct request_key_auth *rka, + struct key **_dest_keyring) +{ + key_ref_t dkref; + + /* just return a NULL pointer if we weren't asked to make a link */ + if (ringid == 0) { + *_dest_keyring = NULL; + return 0; + } + + /* if a specific keyring is nominated by ID, then use that */ + if (ringid > 0) { + dkref = lookup_user_key(ringid, 1, 0, KEY_WRITE); + if (IS_ERR(dkref)) + return PTR_ERR(dkref); + *_dest_keyring = key_ref_to_ptr(dkref); + return 0; + } + + if (ringid == KEY_SPEC_REQKEY_AUTH_KEY) + return -EINVAL; + + /* otherwise specify the destination keyring recorded in the + * authorisation key (any KEY_SPEC_*_KEYRING) */ + if (ringid >= KEY_SPEC_REQUESTOR_KEYRING) { + *_dest_keyring = rka->dest_keyring; + return 0; + } + + return -ENOKEY; +} + /*****************************************************************************/ /* * instantiate the key with the specified payload, and, if one is given, link @@ -840,8 +877,7 @@ long keyctl_instantiate_key(key_serial_t id, key_serial_t ringid) { struct request_key_auth *rka; - struct key *instkey; - key_ref_t keyring_ref; + struct key *instkey, *dest_keyring; void *payload; long ret; bool vm = false; @@ -883,21 +919,15 @@ long keyctl_instantiate_key(key_serial_t id, /* find the destination keyring amongst those belonging to the * requesting task */ - keyring_ref = NULL; - if (ringid) { - keyring_ref = lookup_user_key(rka->context, ringid, 1, 0, - KEY_WRITE); - if (IS_ERR(keyring_ref)) { - ret = PTR_ERR(keyring_ref); - goto error2; - } - } + ret = get_instantiation_keyring(ringid, rka, &dest_keyring); + if (ret < 0) + goto error2; /* instantiate the key and link it into a keyring */ ret = key_instantiate_and_link(rka->target_key, payload, plen, - key_ref_to_ptr(keyring_ref), instkey); + dest_keyring, instkey); - key_ref_put(keyring_ref); + key_put(dest_keyring); /* discard the assumed authority if it's just been disabled by * instantiation of the key */ @@ -924,8 +954,7 @@ error: long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) { struct request_key_auth *rka; - struct key *instkey; - key_ref_t keyring_ref; + struct key *instkey, *dest_keyring; long ret; /* the appropriate instantiation authorisation key must have been @@ -941,20 +970,15 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) /* find the destination keyring if present (which must also be * writable) */ - keyring_ref = NULL; - if (ringid) { - keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE); - if (IS_ERR(keyring_ref)) { - ret = PTR_ERR(keyring_ref); - goto error; - } - } + ret = get_instantiation_keyring(ringid, rka, &dest_keyring); + if (ret < 0) + goto error; /* instantiate the key and link it into a keyring */ ret = key_negate_and_link(rka->target_key, timeout, - key_ref_to_ptr(keyring_ref), instkey); + dest_keyring, instkey); - key_ref_put(keyring_ref); + key_put(dest_keyring); /* discard the assumed authority if it's just been disabled by * instantiation of the key */ @@ -979,13 +1003,13 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) switch (reqkey_defl) { case KEY_REQKEY_DEFL_THREAD_KEYRING: - ret = install_thread_keyring(current); + ret = install_thread_keyring(); if (ret < 0) return ret; goto set; case KEY_REQKEY_DEFL_PROCESS_KEYRING: - ret = install_process_keyring(current); + ret = install_process_keyring(); if (ret < 0) return ret; @@ -1018,7 +1042,7 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout) time_t expiry; long ret; - key_ref = lookup_user_key(NULL, id, 1, 1, KEY_SETATTR); + key_ref = lookup_user_key(id, 1, 1, KEY_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; @@ -1105,7 +1129,7 @@ long keyctl_get_security(key_serial_t keyid, char *context; long ret; - key_ref = lookup_user_key(NULL, keyid, 0, 1, KEY_VIEW); + key_ref = lookup_user_key(keyid, 0, 1, KEY_VIEW); if (IS_ERR(key_ref)) { if (PTR_ERR(key_ref) != -EACCES) return PTR_ERR(key_ref); @@ -1117,7 +1141,7 @@ long keyctl_get_security(key_serial_t keyid, return PTR_ERR(key_ref); key_put(instkey); - key_ref = lookup_user_key(NULL, keyid, 0, 1, 0); + key_ref = lookup_user_key(keyid, 0, 1, 0); if (IS_ERR(key_ref)) return PTR_ERR(key_ref); } diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 5be6d018759a..1c793b7090a7 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -40,9 +40,9 @@ struct key_user root_key_user = { /* * install user and user session keyrings for a particular UID */ -static int install_user_keyrings(struct task_struct *tsk) +int install_user_keyrings(void) { - struct user_struct *user = tsk->user; + struct user_struct *user = current->user; struct key *uid_keyring, *session_keyring; char buf[20]; int ret; @@ -67,7 +67,7 @@ static int install_user_keyrings(struct task_struct *tsk) uid_keyring = find_keyring_by_name(buf, true); if (IS_ERR(uid_keyring)) { uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - tsk, KEY_ALLOC_IN_QUOTA, + current, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(uid_keyring)) { ret = PTR_ERR(uid_keyring); @@ -83,7 +83,8 @@ static int install_user_keyrings(struct task_struct *tsk) if (IS_ERR(session_keyring)) { session_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - tsk, KEY_ALLOC_IN_QUOTA, NULL); + current, KEY_ALLOC_IN_QUOTA, + NULL); if (IS_ERR(session_keyring)) { ret = PTR_ERR(session_keyring); goto error_release; @@ -146,8 +147,9 @@ void switch_uid_keyring(struct user_struct *new_user) /* * install a fresh thread keyring, discarding the old one */ -int install_thread_keyring(struct task_struct *tsk) +int install_thread_keyring(void) { + struct task_struct *tsk = current; struct key *keyring, *old; char buf[20]; int ret; @@ -178,8 +180,9 @@ error: /* * make sure a process keyring is installed */ -int install_process_keyring(struct task_struct *tsk) +int install_process_keyring(void) { + struct task_struct *tsk = current; struct key *keyring; char buf[20]; int ret; @@ -218,9 +221,9 @@ error: * install a session keyring, discarding the old one * - if a keyring is not supplied, an empty one is invented */ -static int install_session_keyring(struct task_struct *tsk, - struct key *keyring) +static int install_session_keyring(struct key *keyring) { + struct task_struct *tsk = current; unsigned long flags; struct key *old; char buf[20]; @@ -572,93 +575,91 @@ static int lookup_user_key_possessed(const struct key *key, const void *target) * - don't create special keyrings unless so requested * - partially constructed keys aren't found unless requested */ -key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, - int create, int partial, key_perm_t perm) +key_ref_t lookup_user_key(key_serial_t id, int create, int partial, + key_perm_t perm) { + struct request_key_auth *rka; + struct task_struct *t = current; key_ref_t key_ref, skey_ref; struct key *key; int ret; - if (!context) - context = current; - key_ref = ERR_PTR(-ENOKEY); switch (id) { case KEY_SPEC_THREAD_KEYRING: - if (!context->thread_keyring) { + if (!t->thread_keyring) { if (!create) goto error; - ret = install_thread_keyring(context); + ret = install_thread_keyring(); if (ret < 0) { key = ERR_PTR(ret); goto error; } } - key = context->thread_keyring; + key = t->thread_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_PROCESS_KEYRING: - if (!context->signal->process_keyring) { + if (!t->signal->process_keyring) { if (!create) goto error; - ret = install_process_keyring(context); + ret = install_process_keyring(); if (ret < 0) { key = ERR_PTR(ret); goto error; } } - key = context->signal->process_keyring; + key = t->signal->process_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_SESSION_KEYRING: - if (!context->signal->session_keyring) { + if (!t->signal->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ - ret = install_user_keyrings(context); + ret = install_user_keyrings(); if (ret < 0) goto error; - ret = install_session_keyring( - context, context->user->session_keyring); + ret = install_session_keyring(t->user->session_keyring); if (ret < 0) goto error; } rcu_read_lock(); - key = rcu_dereference(context->signal->session_keyring); + key = rcu_dereference(t->signal->session_keyring); atomic_inc(&key->usage); rcu_read_unlock(); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_KEYRING: - if (!context->user->uid_keyring) { - ret = install_user_keyrings(context); + if (!t->user->uid_keyring) { + ret = install_user_keyrings(); if (ret < 0) goto error; } - key = context->user->uid_keyring; + key = t->user->uid_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_SESSION_KEYRING: - if (!context->user->session_keyring) { - ret = install_user_keyrings(context); + if (!t->user->session_keyring) { + ret = install_user_keyrings(); if (ret < 0) goto error; } - key = context->user->session_keyring; + key = t->user->session_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; @@ -669,7 +670,7 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, goto error; case KEY_SPEC_REQKEY_AUTH_KEY: - key = context->request_key_auth; + key = t->request_key_auth; if (!key) goto error; @@ -677,6 +678,25 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, key_ref = make_key_ref(key, 1); break; + case KEY_SPEC_REQUESTOR_KEYRING: + if (!t->request_key_auth) + goto error; + + down_read(&t->request_key_auth->sem); + if (t->request_key_auth->flags & KEY_FLAG_REVOKED) { + key_ref = ERR_PTR(-EKEYREVOKED); + key = NULL; + } else { + rka = t->request_key_auth->payload.data; + key = rka->dest_keyring; + atomic_inc(&key->usage); + } + up_read(&t->request_key_auth->sem); + if (!key) + goto error; + key_ref = make_key_ref(key, 1); + break; + default: key_ref = ERR_PTR(-EINVAL); if (id < 1) @@ -725,7 +745,7 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id, goto invalid_key; /* check the permissions */ - ret = key_task_permission(key_ref, context, perm); + ret = key_task_permission(key_ref, t, perm); if (ret < 0) goto invalid_key; @@ -754,7 +774,7 @@ long join_session_keyring(const char *name) /* if no name is provided, install an anonymous keyring */ if (!name) { - ret = install_session_keyring(tsk, NULL); + ret = install_session_keyring(NULL); if (ret < 0) goto error; @@ -784,7 +804,7 @@ long join_session_keyring(const char *name) } /* we've got a keyring - now to install it */ - ret = install_session_keyring(tsk, keyring); + ret = install_session_keyring(keyring); if (ret < 0) goto error2; diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 91953c814497..8e9d93b4a402 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -76,6 +76,10 @@ static int call_sbin_request_key(struct key_construction *cons, kenter("{%d},{%d},%s", key->serial, authkey->serial, op); + ret = install_user_keyrings(); + if (ret < 0) + goto error_alloc; + /* allocate a new session keyring */ sprintf(desc, "_req.%u", key->serial); @@ -165,7 +169,8 @@ error_alloc: * - we ignore program failure and go on key status instead */ static int construct_key(struct key *key, const void *callout_info, - size_t callout_len, void *aux) + size_t callout_len, void *aux, + struct key *dest_keyring) { struct key_construction *cons; request_key_actor_t actor; @@ -179,7 +184,8 @@ static int construct_key(struct key *key, const void *callout_info, return -ENOMEM; /* allocate an authorisation key */ - authkey = request_key_auth_new(key, callout_info, callout_len); + authkey = request_key_auth_new(key, callout_info, callout_len, + dest_keyring); if (IS_ERR(authkey)) { kfree(cons); ret = PTR_ERR(authkey); @@ -207,27 +213,48 @@ static int construct_key(struct key *key, const void *callout_info, } /* - * link a key to the appropriate destination keyring - * - the caller must hold a write lock on the destination keyring + * get the appropriate destination keyring for the request + * - we return whatever keyring we select with an extra reference upon it which + * the caller must release */ -static void construct_key_make_link(struct key *key, struct key *dest_keyring) +static void construct_get_dest_keyring(struct key **_dest_keyring) { + struct request_key_auth *rka; struct task_struct *tsk = current; - struct key *drop = NULL; + struct key *dest_keyring = *_dest_keyring, *authkey; - kenter("{%d},%p", key->serial, dest_keyring); + kenter("%p", dest_keyring); /* find the appropriate keyring */ - if (!dest_keyring) { + if (dest_keyring) { + /* the caller supplied one */ + key_get(dest_keyring); + } else { + /* use a default keyring; falling through the cases until we + * find one that we actually have */ switch (tsk->jit_keyring) { case KEY_REQKEY_DEFL_DEFAULT: + case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: + if (tsk->request_key_auth) { + authkey = tsk->request_key_auth; + down_read(&authkey->sem); + rka = authkey->payload.data; + if (!test_bit(KEY_FLAG_REVOKED, + &authkey->flags)) + dest_keyring = + key_get(rka->dest_keyring); + up_read(&authkey->sem); + if (dest_keyring) + break; + } + case KEY_REQKEY_DEFL_THREAD_KEYRING: - dest_keyring = tsk->thread_keyring; + dest_keyring = key_get(tsk->thread_keyring); if (dest_keyring) break; case KEY_REQKEY_DEFL_PROCESS_KEYRING: - dest_keyring = tsk->signal->process_keyring; + dest_keyring = key_get(tsk->signal->process_keyring); if (dest_keyring) break; @@ -236,17 +263,16 @@ static void construct_key_make_link(struct key *key, struct key *dest_keyring) dest_keyring = key_get( rcu_dereference(tsk->signal->session_keyring)); rcu_read_unlock(); - drop = dest_keyring; if (dest_keyring) break; case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: - dest_keyring = tsk->user->session_keyring; + dest_keyring = key_get(tsk->user->session_keyring); break; case KEY_REQKEY_DEFL_USER_KEYRING: - dest_keyring = tsk->user->uid_keyring; + dest_keyring = key_get(tsk->user->uid_keyring); break; case KEY_REQKEY_DEFL_GROUP_KEYRING: @@ -255,10 +281,9 @@ static void construct_key_make_link(struct key *key, struct key *dest_keyring) } } - /* and attach the key to it */ - __key_link(dest_keyring, key); - key_put(drop); - kleave(""); + *_dest_keyring = dest_keyring; + kleave(" [dk %d]", key_serial(dest_keyring)); + return; } /* @@ -288,8 +313,7 @@ static int construct_alloc_key(struct key_type *type, set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags); - if (dest_keyring) - down_write(&dest_keyring->sem); + down_write(&dest_keyring->sem); /* attach the key to the destination keyring under lock, but we do need * to do another check just in case someone beat us to it whilst we @@ -301,12 +325,10 @@ static int construct_alloc_key(struct key_type *type, if (!IS_ERR(key_ref)) goto key_already_present; - if (dest_keyring) - construct_key_make_link(key, dest_keyring); + __key_link(dest_keyring, key); mutex_unlock(&key_construction_mutex); - if (dest_keyring) - up_write(&dest_keyring->sem); + up_write(&dest_keyring->sem); mutex_unlock(&user->cons_lock); *_key = key; kleave(" = 0 [%d]", key_serial(key)); @@ -348,21 +370,26 @@ static struct key *construct_key_and_link(struct key_type *type, if (!user) return ERR_PTR(-ENOMEM); + construct_get_dest_keyring(&dest_keyring); + ret = construct_alloc_key(type, description, dest_keyring, flags, user, &key); key_user_put(user); if (ret == 0) { - ret = construct_key(key, callout_info, callout_len, aux); + ret = construct_key(key, callout_info, callout_len, aux, + dest_keyring); if (ret < 0) goto construction_failed; } + key_put(dest_keyring); return key; construction_failed: key_negate_and_link(key, key_negative_timeout, NULL, NULL); key_put(key); + key_put(dest_keyring); return ERR_PTR(ret); } diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 729156b3485e..1762d44711d5 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -128,6 +128,7 @@ static void request_key_auth_destroy(struct key *key) } key_put(rka->target_key); + key_put(rka->dest_keyring); kfree(rka->callout_info); kfree(rka); @@ -139,7 +140,7 @@ static void request_key_auth_destroy(struct key *key) * access to the caller's security data */ struct key *request_key_auth_new(struct key *target, const void *callout_info, - size_t callout_len) + size_t callout_len, struct key *dest_keyring) { struct request_key_auth *rka, *irka; struct key *authkey = NULL; @@ -188,6 +189,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, } rka->target_key = key_get(target); + rka->dest_keyring = key_get(dest_keyring); memcpy(rka->callout_info, callout_info, callout_len); rka->callout_len = callout_len; @@ -223,6 +225,7 @@ error_inst: key_put(authkey); error_alloc: key_put(rka->target_key); + key_put(rka->dest_keyring); kfree(rka->callout_info); kfree(rka); kleave("= %d", ret); -- cgit v1.2.3 From 1cdcbec1a3372c0c49c59d292e708fd07b509f18 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:14 +1100 Subject: CRED: Neuter sys_capset() Take away the ability for sys_capset() to affect processes other than current. This means that current will not need to lock its own credentials when reading them against interference by other processes. This has effectively been the case for a while anyway, since: (1) Without LSM enabled, sys_capset() is disallowed. (2) With file-based capabilities, sys_capset() is neutered. Signed-off-by: David Howells Acked-by: Serge Hallyn Acked-by: Andrew G. Morgan Acked-by: James Morris Signed-off-by: James Morris --- fs/open.c | 12 +-- include/linux/security.h | 48 ++++------ kernel/capability.c | 227 +++++------------------------------------------ security/commoncap.c | 29 ++---- security/security.c | 18 ++-- security/selinux/hooks.c | 10 +-- 6 files changed, 61 insertions(+), 283 deletions(-) (limited to 'include') diff --git a/fs/open.c b/fs/open.c index 83cdb9dee0c1..500cc0c54762 100644 --- a/fs/open.c +++ b/fs/open.c @@ -441,17 +441,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) current->fsgid = current->gid; if (!issecure(SECURE_NO_SETUID_FIXUP)) { - /* - * Clear the capabilities if we switch to a non-root user - */ -#ifndef CONFIG_SECURITY_FILE_CAPABILITIES - /* - * FIXME: There is a race here against sys_capset. The - * capabilities can change yet we will restore the old - * value below. We should hold task_capabilities_lock, - * but we cannot because user_path_at can sleep. - */ -#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */ + /* Clear the capabilities if we switch to a non-root user */ if (current->uid) old_cap = cap_set_effective(__cap_empty_set); else diff --git a/include/linux/security.h b/include/linux/security.h index 5fe28a671cd3..d1ce8beddbd7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -53,8 +53,8 @@ extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -extern int cap_capset_check(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -extern void cap_capset_set(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); +extern int cap_capset_check(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); +extern void cap_capset_set(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); extern int cap_bprm_set_security(struct linux_binprm *bprm); extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); extern int cap_bprm_secureexec(struct linux_binprm *bprm); @@ -1191,24 +1191,14 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Return 0 if the capability sets were successfully obtained. * @capset_check: * Check permission before setting the @effective, @inheritable, and - * @permitted capability sets for the @target process. - * Caveat: @target is also set to current if a set of processes is - * specified (i.e. all processes other than current and init or a - * particular process group). Hence, the capset_set hook may need to - * revalidate permission to the actual target process. - * @target contains the task_struct structure for target process. + * @permitted capability sets for the current process. * @effective contains the effective capability set. * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. * Return 0 if permission is granted. * @capset_set: * Set the @effective, @inheritable, and @permitted capability sets for - * the @target process. Since capset_check cannot always check permission - * to the real @target process, this hook may also perform permission - * checking to determine if the current process is allowed to set the - * capability sets of the @target process. However, this hook has no way - * of returning an error due to the structure of the sys_capset code. - * @target contains the task_struct structure for target process. + * the current process. * @effective contains the effective capability set. * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. @@ -1303,12 +1293,10 @@ struct security_operations { int (*capget) (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capset_check) (struct task_struct *target, - kernel_cap_t *effective, + int (*capset_check) (kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); - void (*capset_set) (struct task_struct *target, - kernel_cap_t *effective, + void (*capset_set) (kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); int (*capable) (struct task_struct *tsk, int cap, int audit); @@ -1572,12 +1560,10 @@ int security_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -int security_capset_check(struct task_struct *target, - kernel_cap_t *effective, +int security_capset_check(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -void security_capset_set(struct task_struct *target, - kernel_cap_t *effective, +void security_capset_set(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); int security_capable(struct task_struct *tsk, int cap); @@ -1769,20 +1755,18 @@ static inline int security_capget(struct task_struct *target, return cap_capget(target, effective, inheritable, permitted); } -static inline int security_capset_check(struct task_struct *target, - kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +static inline int security_capset_check(kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) { - return cap_capset_check(target, effective, inheritable, permitted); + return cap_capset_check(effective, inheritable, permitted); } -static inline void security_capset_set(struct task_struct *target, - kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +static inline void security_capset_set(kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) { - cap_capset_set(target, effective, inheritable, permitted); + cap_capset_set(effective, inheritable, permitted); } static inline int security_capable(struct task_struct *tsk, int cap) diff --git a/kernel/capability.c b/kernel/capability.c index adb262f83de1..58b00519624a 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -127,160 +127,6 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy) return 0; } -#ifndef CONFIG_SECURITY_FILE_CAPABILITIES - -/* - * Without filesystem capability support, we nominally support one process - * setting the capabilities of another - */ -static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, - kernel_cap_t *pIp, kernel_cap_t *pPp) -{ - struct task_struct *target; - int ret; - - spin_lock(&task_capability_lock); - read_lock(&tasklist_lock); - - if (pid && pid != task_pid_vnr(current)) { - target = find_task_by_vpid(pid); - if (!target) { - ret = -ESRCH; - goto out; - } - } else - target = current; - - ret = security_capget(target, pEp, pIp, pPp); - -out: - read_unlock(&tasklist_lock); - spin_unlock(&task_capability_lock); - - return ret; -} - -/* - * cap_set_pg - set capabilities for all processes in a given process - * group. We call this holding task_capability_lock and tasklist_lock. - */ -static inline int cap_set_pg(int pgrp_nr, kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) -{ - struct task_struct *g, *target; - int ret = -EPERM; - int found = 0; - struct pid *pgrp; - - spin_lock(&task_capability_lock); - read_lock(&tasklist_lock); - - pgrp = find_vpid(pgrp_nr); - do_each_pid_task(pgrp, PIDTYPE_PGID, g) { - target = g; - while_each_thread(g, target) { - if (!security_capset_check(target, effective, - inheritable, permitted)) { - security_capset_set(target, effective, - inheritable, permitted); - ret = 0; - } - found = 1; - } - } while_each_pid_task(pgrp, PIDTYPE_PGID, g); - - read_unlock(&tasklist_lock); - spin_unlock(&task_capability_lock); - - if (!found) - ret = 0; - return ret; -} - -/* - * cap_set_all - set capabilities for all processes other than init - * and self. We call this holding task_capability_lock and tasklist_lock. - */ -static inline int cap_set_all(kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) -{ - struct task_struct *g, *target; - int ret = -EPERM; - int found = 0; - - spin_lock(&task_capability_lock); - read_lock(&tasklist_lock); - - do_each_thread(g, target) { - if (target == current - || is_container_init(target->group_leader)) - continue; - found = 1; - if (security_capset_check(target, effective, inheritable, - permitted)) - continue; - ret = 0; - security_capset_set(target, effective, inheritable, permitted); - } while_each_thread(g, target); - - read_unlock(&tasklist_lock); - spin_unlock(&task_capability_lock); - - if (!found) - ret = 0; - - return ret; -} - -/* - * Given the target pid does not refer to the current process we - * need more elaborate support... (This support is not present when - * filesystem capabilities are configured.) - */ -static inline int do_sys_capset_other_tasks(pid_t pid, kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) -{ - struct task_struct *target; - int ret; - - if (!capable(CAP_SETPCAP)) - return -EPERM; - - if (pid == -1) /* all procs other than current and init */ - return cap_set_all(effective, inheritable, permitted); - - else if (pid < 0) /* all procs in process group */ - return cap_set_pg(-pid, effective, inheritable, permitted); - - /* target != current */ - spin_lock(&task_capability_lock); - read_lock(&tasklist_lock); - - target = find_task_by_vpid(pid); - if (!target) - ret = -ESRCH; - else { - ret = security_capset_check(target, effective, inheritable, - permitted); - - /* having verified that the proposed changes are legal, - we now put them into effect. */ - if (!ret) - security_capset_set(target, effective, inheritable, - permitted); - } - - read_unlock(&tasklist_lock); - spin_unlock(&task_capability_lock); - - return ret; -} - -#else /* ie., def CONFIG_SECURITY_FILE_CAPABILITIES */ - /* * If we have configured with filesystem capability support, then the * only thing that can change the capabilities of the current process @@ -314,22 +160,6 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, return ret; } -/* - * With filesystem capability support configured, the kernel does not - * permit the changing of capabilities in one process by another - * process. (CAP_SETPCAP has much less broad semantics when configured - * this way.) - */ -static inline int do_sys_capset_other_tasks(pid_t pid, - kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) -{ - return -EPERM; -} - -#endif /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */ - /* * Atomically modify the effective capabilities returning the original * value. No permission check is performed here - it is assumed that the @@ -424,16 +254,14 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) * @data: pointer to struct that contains the effective, permitted, * and inheritable capabilities * - * Set capabilities for a given process, all processes, or all - * processes in a given process group. + * Set capabilities for the current process only. The ability to any other + * process(es) has been deprecated and removed. * * The restrictions on setting capabilities are specified as: * - * [pid is for the 'target' task. 'current' is the calling task.] - * - * I: any raised capabilities must be a subset of the (old current) permitted - * P: any raised capabilities must be a subset of the (old current) permitted - * E: must be set to a subset of (new target) permitted + * I: any raised capabilities must be a subset of the old permitted + * P: any raised capabilities must be a subset of the old permitted + * E: must be set to a subset of new permitted * * Returns 0 on success and < 0 on error. */ @@ -452,10 +280,13 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) if (get_user(pid, &header->pid)) return -EFAULT; + /* may only affect current now */ + if (pid != 0 && pid != task_pid_vnr(current)) + return -EPERM; + if (copy_from_user(&kdata, data, tocopy - * sizeof(struct __user_cap_data_struct))) { + * sizeof(struct __user_cap_data_struct))) return -EFAULT; - } for (i = 0; i < tocopy; i++) { effective.cap[i] = kdata[i].effective; @@ -473,32 +304,20 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) if (ret) return ret; - if (pid && (pid != task_pid_vnr(current))) - ret = do_sys_capset_other_tasks(pid, &effective, &inheritable, - &permitted); - else { - /* - * This lock is required even when filesystem - * capability support is configured - it protects the - * sys_capget() call from returning incorrect data in - * the case that the targeted process is not the - * current one. - */ - spin_lock(&task_capability_lock); - - ret = security_capset_check(current, &effective, &inheritable, - &permitted); - /* - * Having verified that the proposed changes are - * legal, we now put them into effect. - */ - if (!ret) - security_capset_set(current, &effective, &inheritable, - &permitted); - spin_unlock(&task_capability_lock); - } - + /* This lock is required even when filesystem capability support is + * configured - it protects the sys_capget() call from returning + * incorrect data in the case that the targeted process is not the + * current one. + */ + spin_lock(&task_capability_lock); + ret = security_capset_check(&effective, &inheritable, &permitted); + /* Having verified that the proposed changes are legal, we now put them + * into effect. + */ + if (!ret) + security_capset_set(&effective, &inheritable, &permitted); + spin_unlock(&task_capability_lock); return ret; } diff --git a/security/commoncap.c b/security/commoncap.c index 8283271f0768..e3f36ef629fa 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -96,15 +96,6 @@ int cap_capget (struct task_struct *target, kernel_cap_t *effective, #ifdef CONFIG_SECURITY_FILE_CAPABILITIES -static inline int cap_block_setpcap(struct task_struct *target) -{ - /* - * No support for remote process capability manipulation with - * filesystem capability support. - */ - return (target != current); -} - static inline int cap_inh_is_capped(void) { /* @@ -119,7 +110,6 @@ static inline int cap_limit_ptraced_target(void) { return 1; } #else /* ie., ndef CONFIG_SECURITY_FILE_CAPABILITIES */ -static inline int cap_block_setpcap(struct task_struct *t) { return 0; } static inline int cap_inh_is_capped(void) { return 1; } static inline int cap_limit_ptraced_target(void) { @@ -128,21 +118,18 @@ static inline int cap_limit_ptraced_target(void) #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ -int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, +int cap_capset_check (kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - if (cap_block_setpcap(target)) { - return -EPERM; - } if (cap_inh_is_capped() && !cap_issubset(*inheritable, - cap_combine(target->cap_inheritable, + cap_combine(current->cap_inheritable, current->cap_permitted))) { /* incapable of using this inheritable set */ return -EPERM; } if (!cap_issubset(*inheritable, - cap_combine(target->cap_inheritable, + cap_combine(current->cap_inheritable, current->cap_bset))) { /* no new pI capabilities outside bounding set */ return -EPERM; @@ -150,7 +137,7 @@ int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, /* verify restrictions on target's new Permitted set */ if (!cap_issubset (*permitted, - cap_combine (target->cap_permitted, + cap_combine (current->cap_permitted, current->cap_permitted))) { return -EPERM; } @@ -163,12 +150,12 @@ int cap_capset_check (struct task_struct *target, kernel_cap_t *effective, return 0; } -void cap_capset_set (struct task_struct *target, kernel_cap_t *effective, +void cap_capset_set (kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - target->cap_effective = *effective; - target->cap_inheritable = *inheritable; - target->cap_permitted = *permitted; + current->cap_effective = *effective; + current->cap_inheritable = *inheritable; + current->cap_permitted = *permitted; } static inline void bprm_clear_caps(struct linux_binprm *bprm) diff --git a/security/security.c b/security/security.c index 346f21e0ec2c..dca37381e2a7 100644 --- a/security/security.c +++ b/security/security.c @@ -145,20 +145,18 @@ int security_capget(struct task_struct *target, return security_ops->capget(target, effective, inheritable, permitted); } -int security_capset_check(struct task_struct *target, - kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +int security_capset_check(kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) { - return security_ops->capset_check(target, effective, inheritable, permitted); + return security_ops->capset_check(effective, inheritable, permitted); } -void security_capset_set(struct task_struct *target, - kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +void security_capset_set(kernel_cap_t *effective, + kernel_cap_t *inheritable, + kernel_cap_t *permitted) { - security_ops->capset_set(target, effective, inheritable, permitted); + security_ops->capset_set(effective, inheritable, permitted); } int security_capable(struct task_struct *tsk, int cap) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 378dc53c08e8..df9986940e9c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1790,22 +1790,22 @@ static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, return secondary_ops->capget(target, effective, inheritable, permitted); } -static int selinux_capset_check(struct task_struct *target, kernel_cap_t *effective, +static int selinux_capset_check(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { int error; - error = secondary_ops->capset_check(target, effective, inheritable, permitted); + error = secondary_ops->capset_check(effective, inheritable, permitted); if (error) return error; - return task_has_perm(current, target, PROCESS__SETCAP); + return task_has_perm(current, current, PROCESS__SETCAP); } -static void selinux_capset_set(struct task_struct *target, kernel_cap_t *effective, +static void selinux_capset_set(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - secondary_ops->capset_set(target, effective, inheritable, permitted); + secondary_ops->capset_set(effective, inheritable, permitted); } static int selinux_capable(struct task_struct *tsk, int cap, int audit) -- cgit v1.2.3 From 15a2460ed0af7538ca8e6c610fe607a2cd9da142 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:15 +1100 Subject: CRED: Constify the kernel_cap_t arguments to the capset LSM hooks Constify the kernel_cap_t arguments to the capset LSM hooks. Signed-off-by: David Howells Acked-by: Serge Hallyn Acked-by: James Morris Signed-off-by: James Morris --- include/linux/security.h | 44 ++++++++++++++++++++++++-------------------- security/commoncap.c | 10 ++++++---- security/security.c | 12 ++++++------ security/selinux/hooks.c | 10 ++++++---- 4 files changed, 42 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index d1ce8beddbd7..9f305d4a31a7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -53,8 +53,12 @@ extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -extern int cap_capset_check(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -extern void cap_capset_set(kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); +extern int cap_capset_check(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); +extern void cap_capset_set(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); extern int cap_bprm_set_security(struct linux_binprm *bprm); extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); extern int cap_bprm_secureexec(struct linux_binprm *bprm); @@ -1293,12 +1297,12 @@ struct security_operations { int (*capget) (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capset_check) (kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted); - void (*capset_set) (kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted); + int (*capset_check) (const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); + void (*capset_set) (const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); int (*capable) (struct task_struct *tsk, int cap, int audit); int (*acct) (struct file *file); int (*sysctl) (struct ctl_table *table, int op); @@ -1560,12 +1564,12 @@ int security_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -int security_capset_check(kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted); -void security_capset_set(kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted); +int security_capset_check(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); +void security_capset_set(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); int security_capable(struct task_struct *tsk, int cap); int security_capable_noaudit(struct task_struct *tsk, int cap); int security_acct(struct file *file); @@ -1755,16 +1759,16 @@ static inline int security_capget(struct task_struct *target, return cap_capget(target, effective, inheritable, permitted); } -static inline int security_capset_check(kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +static inline int security_capset_check(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { return cap_capset_check(effective, inheritable, permitted); } -static inline void security_capset_set(kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +static inline void security_capset_set(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { cap_capset_set(effective, inheritable, permitted); } diff --git a/security/commoncap.c b/security/commoncap.c index e3f36ef629fa..fb4e240720d8 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -118,8 +118,9 @@ static inline int cap_limit_ptraced_target(void) #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ -int cap_capset_check (kernel_cap_t *effective, - kernel_cap_t *inheritable, kernel_cap_t *permitted) +int cap_capset_check(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { if (cap_inh_is_capped() && !cap_issubset(*inheritable, @@ -150,8 +151,9 @@ int cap_capset_check (kernel_cap_t *effective, return 0; } -void cap_capset_set (kernel_cap_t *effective, - kernel_cap_t *inheritable, kernel_cap_t *permitted) +void cap_capset_set(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { current->cap_effective = *effective; current->cap_inheritable = *inheritable; diff --git a/security/security.c b/security/security.c index dca37381e2a7..81c956a12300 100644 --- a/security/security.c +++ b/security/security.c @@ -145,16 +145,16 @@ int security_capget(struct task_struct *target, return security_ops->capget(target, effective, inheritable, permitted); } -int security_capset_check(kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +int security_capset_check(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { return security_ops->capset_check(effective, inheritable, permitted); } -void security_capset_set(kernel_cap_t *effective, - kernel_cap_t *inheritable, - kernel_cap_t *permitted) +void security_capset_set(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { security_ops->capset_set(effective, inheritable, permitted); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index df9986940e9c..9f6da154cc82 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1790,8 +1790,9 @@ static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, return secondary_ops->capget(target, effective, inheritable, permitted); } -static int selinux_capset_check(kernel_cap_t *effective, - kernel_cap_t *inheritable, kernel_cap_t *permitted) +static int selinux_capset_check(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { int error; @@ -1802,8 +1803,9 @@ static int selinux_capset_check(kernel_cap_t *effective, return task_has_perm(current, current, PROCESS__SETCAP); } -static void selinux_capset_set(kernel_cap_t *effective, - kernel_cap_t *inheritable, kernel_cap_t *permitted) +static void selinux_capset_set(const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { secondary_ops->capset_set(effective, inheritable, permitted); } -- cgit v1.2.3 From b6dff3ec5e116e3af6f537d4caedcad6b9e5082a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:16 +1100 Subject: CRED: Separate task security context from task_struct Separate the task security context from task_struct. At this point, the security data is temporarily embedded in the task_struct with two pointers pointing to it. Note that the Alpha arch is altered as it refers to (E)UID and (E)GID in entry.S via asm-offsets. With comment fixes Signed-off-by: Marc Dionne Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- arch/alpha/kernel/asm-offsets.c | 11 +- arch/alpha/kernel/entry.S | 10 +- arch/ia64/ia32/sys_ia32.c | 8 +- arch/mips/kernel/kspd.c | 4 +- arch/s390/kernel/compat_linux.c | 28 ++--- drivers/connector/cn_proc.c | 8 +- fs/binfmt_elf.c | 12 +- fs/binfmt_elf_fdpic.c | 12 +- fs/exec.c | 4 +- fs/fcntl.c | 4 +- fs/file_table.c | 4 +- fs/fuse/dir.c | 12 +- fs/hugetlbfs/inode.c | 4 +- fs/ioprio.c | 12 +- fs/nfsd/auth.c | 22 ++-- fs/nfsd/nfs4recover.c | 12 +- fs/nfsd/nfsfh.c | 6 +- fs/open.c | 17 +-- fs/proc/array.c | 18 +-- fs/proc/base.c | 16 +-- fs/xfs/linux-2.6/xfs_cred.h | 6 +- fs/xfs/linux-2.6/xfs_globals.h | 2 +- fs/xfs/xfs_inode.h | 2 +- fs/xfs/xfs_vnodeops.h | 10 +- include/linux/cred.h | 155 +++++++++++++++++++---- include/linux/init_task.h | 24 ++-- include/linux/sched.h | 52 +------- include/linux/securebits.h | 2 +- ipc/mqueue.c | 2 +- ipc/shm.c | 4 +- kernel/auditsc.c | 52 ++++---- kernel/capability.c | 4 +- kernel/cgroup.c | 4 +- kernel/exit.c | 10 +- kernel/fork.c | 24 ++-- kernel/futex.c | 6 +- kernel/futex_compat.c | 5 +- kernel/ptrace.c | 19 +-- kernel/sched.c | 10 +- kernel/signal.c | 16 +-- kernel/sys.c | 266 ++++++++++++++++++++++----------------- kernel/trace/trace.c | 2 +- kernel/tsacct.c | 4 +- kernel/uid16.c | 28 ++--- kernel/user.c | 4 +- mm/mempolicy.c | 10 +- mm/migrate.c | 10 +- mm/oom_kill.c | 2 +- net/core/scm.c | 10 +- net/sunrpc/auth.c | 2 +- security/commoncap.c | 161 +++++++++++++----------- security/keys/keyctl.c | 25 ++-- security/keys/permission.c | 11 +- security/keys/process_keys.c | 98 ++++++++------- security/keys/request_key.c | 18 +-- security/keys/request_key_auth.c | 12 +- security/selinux/exports.c | 2 +- security/selinux/hooks.c | 116 ++++++++--------- security/selinux/selinuxfs.c | 2 +- security/selinux/xfrm.c | 6 +- security/smack/smack_access.c | 4 +- security/smack/smack_lsm.c | 77 ++++++------ security/smack/smackfs.c | 6 +- 63 files changed, 832 insertions(+), 677 deletions(-) (limited to 'include') diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c index 4b18cd94d59d..6ff8886e7e22 100644 --- a/arch/alpha/kernel/asm-offsets.c +++ b/arch/alpha/kernel/asm-offsets.c @@ -19,15 +19,18 @@ void foo(void) BLANK(); DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked)); - DEFINE(TASK_UID, offsetof(struct task_struct, uid)); - DEFINE(TASK_EUID, offsetof(struct task_struct, euid)); - DEFINE(TASK_GID, offsetof(struct task_struct, gid)); - DEFINE(TASK_EGID, offsetof(struct task_struct, egid)); + DEFINE(TASK_CRED, offsetof(struct task_struct, cred)); DEFINE(TASK_REAL_PARENT, offsetof(struct task_struct, real_parent)); DEFINE(TASK_GROUP_LEADER, offsetof(struct task_struct, group_leader)); DEFINE(TASK_TGID, offsetof(struct task_struct, tgid)); BLANK(); + DEFINE(CRED_UID, offsetof(struct cred, uid)); + DEFINE(CRED_EUID, offsetof(struct cred, euid)); + DEFINE(CRED_GID, offsetof(struct cred, gid)); + DEFINE(CRED_EGID, offsetof(struct cred, egid)); + BLANK(); + DEFINE(SIZEOF_PT_REGS, sizeof(struct pt_regs)); DEFINE(PT_PTRACED, PT_PTRACED); DEFINE(CLONE_VM, CLONE_VM); diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index 5fc61e281ac7..f77345bc66a9 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -850,8 +850,9 @@ osf_getpriority: sys_getxuid: .prologue 0 ldq $2, TI_TASK($8) - ldl $0, TASK_UID($2) - ldl $1, TASK_EUID($2) + ldq $3, TASK_CRED($2) + ldl $0, CRED_UID($3) + ldl $1, CRED_EUID($3) stq $1, 80($sp) ret .end sys_getxuid @@ -862,8 +863,9 @@ sys_getxuid: sys_getxgid: .prologue 0 ldq $2, TI_TASK($8) - ldl $0, TASK_GID($2) - ldl $1, TASK_EGID($2) + ldq $3, TASK_CRED($2) + ldl $0, CRED_GID($3) + ldl $1, CRED_EGID($3) stq $1, 80($sp) ret .end sys_getxgid diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 5e92ae00bdbb..2445a9d3488e 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1772,20 +1772,20 @@ sys32_getgroups16 (int gidsetsize, short __user *grouplist) if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(current->cred->group_info); + i = current->cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, current->cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(current->cred->group_info); return i; } diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c index b0591ae0ce56..fd6e51224034 100644 --- a/arch/mips/kernel/kspd.c +++ b/arch/mips/kernel/kspd.c @@ -174,8 +174,8 @@ static unsigned int translate_open_flags(int flags) static void sp_setfsuidgid( uid_t uid, gid_t gid) { - current->fsuid = uid; - current->fsgid = gid; + current->cred->fsuid = uid; + current->cred->fsgid = gid; key_fsuid_changed(current); key_fsgid_changed(current); diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 4646382af34f..6cc87d8c8682 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -148,9 +148,9 @@ asmlinkage long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user { int retval; - if (!(retval = put_user(high2lowuid(current->uid), ruid)) && - !(retval = put_user(high2lowuid(current->euid), euid))) - retval = put_user(high2lowuid(current->suid), suid); + if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && + !(retval = put_user(high2lowuid(current->cred->euid), euid))) + retval = put_user(high2lowuid(current->cred->suid), suid); return retval; } @@ -165,9 +165,9 @@ asmlinkage long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user { int retval; - if (!(retval = put_user(high2lowgid(current->gid), rgid)) && - !(retval = put_user(high2lowgid(current->egid), egid))) - retval = put_user(high2lowgid(current->sgid), sgid); + if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && + !(retval = put_user(high2lowgid(current->cred->egid), egid))) + retval = put_user(high2lowgid(current->cred->sgid), sgid); return retval; } @@ -217,20 +217,20 @@ asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist) if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(current->cred->group_info); + i = current->cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, current->cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(current->cred->group_info); return i; } @@ -261,22 +261,22 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) asmlinkage long sys32_getuid16(void) { - return high2lowuid(current->uid); + return high2lowuid(current->cred->uid); } asmlinkage long sys32_geteuid16(void) { - return high2lowuid(current->euid); + return high2lowuid(current->cred->euid); } asmlinkage long sys32_getgid16(void) { - return high2lowgid(current->gid); + return high2lowgid(current->cred->gid); } asmlinkage long sys32_getegid16(void) { - return high2lowgid(current->egid); + return high2lowgid(current->cred->egid); } /* diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 5c9f67f98d10..354c1ff17159 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -116,11 +116,11 @@ void proc_id_connector(struct task_struct *task, int which_id) ev->event_data.id.process_pid = task->pid; ev->event_data.id.process_tgid = task->tgid; if (which_id == PROC_EVENT_UID) { - ev->event_data.id.r.ruid = task->uid; - ev->event_data.id.e.euid = task->euid; + ev->event_data.id.r.ruid = task->cred->uid; + ev->event_data.id.e.euid = task->cred->euid; } else if (which_id == PROC_EVENT_GID) { - ev->event_data.id.r.rgid = task->gid; - ev->event_data.id.e.egid = task->egid; + ev->event_data.id.r.rgid = task->cred->gid; + ev->event_data.id.e.egid = task->cred->egid; } else return; get_seq(&msg->seq, &ev->cpu); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 8fcfa398d350..7a52477ce493 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -223,10 +223,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, NEW_AUX_ENT(AT_BASE, interp_load_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec->e_entry); - NEW_AUX_ENT(AT_UID, tsk->uid); - NEW_AUX_ENT(AT_EUID, tsk->euid); - NEW_AUX_ENT(AT_GID, tsk->gid); - NEW_AUX_ENT(AT_EGID, tsk->egid); + NEW_AUX_ENT(AT_UID, tsk->cred->uid); + NEW_AUX_ENT(AT_EUID, tsk->cred->euid); + NEW_AUX_ENT(AT_GID, tsk->cred->gid); + NEW_AUX_ENT(AT_EGID, tsk->cred->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); if (k_platform) { @@ -1388,8 +1388,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_zomb = psinfo->pr_sname == 'Z'; psinfo->pr_nice = task_nice(p); psinfo->pr_flag = p->flags; - SET_UID(psinfo->pr_uid, p->uid); - SET_GID(psinfo->pr_gid, p->gid); + SET_UID(psinfo->pr_uid, p->cred->uid); + SET_GID(psinfo->pr_gid, p->cred->gid); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); return 0; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 488584c87512..9f67054c2c4e 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -623,10 +623,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr); - NEW_AUX_ENT(AT_UID, (elf_addr_t) current_uid()); - NEW_AUX_ENT(AT_EUID, (elf_addr_t) current_euid()); - NEW_AUX_ENT(AT_GID, (elf_addr_t) current_gid()); - NEW_AUX_ENT(AT_EGID, (elf_addr_t) current_egid()); + NEW_AUX_ENT(AT_UID, (elf_addr_t) current->cred->uid); + NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->cred->euid); + NEW_AUX_ENT(AT_GID, (elf_addr_t) current->cred->gid); + NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->cred->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); @@ -1440,8 +1440,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_zomb = psinfo->pr_sname == 'Z'; psinfo->pr_nice = task_nice(p); psinfo->pr_flag = p->flags; - SET_UID(psinfo->pr_uid, p->uid); - SET_GID(psinfo->pr_gid, p->gid); + SET_UID(psinfo->pr_uid, p->cred->uid); + SET_GID(psinfo->pr_gid, p->cred->gid); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); return 0; diff --git a/fs/exec.c b/fs/exec.c index 604834f3b208..31149e430a89 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1738,7 +1738,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) */ if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ - current->fsuid = 0; /* Dump root private */ + current->cred->fsuid = 0; /* Dump root private */ } retval = coredump_wait(exit_code, &core_state); @@ -1834,7 +1834,7 @@ fail_unlock: if (helper_argv) argv_free(helper_argv); - current->fsuid = fsuid; + current->cred->fsuid = fsuid; coredump_finish(mm); fail: return retval; diff --git a/fs/fcntl.c b/fs/fcntl.c index bf049a805e59..63964d863ad6 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -401,8 +401,8 @@ static inline int sigio_perm(struct task_struct *p, struct fown_struct *fown, int sig) { return (((fown->euid == 0) || - (fown->euid == p->suid) || (fown->euid == p->uid) || - (fown->uid == p->suid) || (fown->uid == p->uid)) && + (fown->euid == p->cred->suid) || (fown->euid == p->cred->uid) || + (fown->uid == p->cred->suid) || (fown->uid == p->cred->uid)) && !security_file_send_sigiotask(p, fown, sig)); } diff --git a/fs/file_table.c b/fs/file_table.c index 5ad0eca6eea2..3152b53cfab0 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -122,8 +122,8 @@ struct file *get_empty_filp(void) INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); - f->f_uid = tsk->fsuid; - f->f_gid = tsk->fsgid; + f->f_uid = tsk->cred->fsuid; + f->f_gid = tsk->cred->fsgid; eventpoll_init_file(f); /* f->f_version: 0 */ return f; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index fd03330cadeb..e97a98981862 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -872,12 +872,12 @@ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) if (fc->flags & FUSE_ALLOW_OTHER) return 1; - if (task->euid == fc->user_id && - task->suid == fc->user_id && - task->uid == fc->user_id && - task->egid == fc->group_id && - task->sgid == fc->group_id && - task->gid == fc->group_id) + if (task->cred->euid == fc->user_id && + task->cred->suid == fc->user_id && + task->cred->uid == fc->user_id && + task->cred->egid == fc->group_id && + task->cred->sgid == fc->group_id && + task->cred->gid == fc->group_id) return 1; return 0; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 08ad76c79b49..870a721b8bd2 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -958,7 +958,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size) if (!can_do_hugetlb_shm()) return ERR_PTR(-EPERM); - if (!user_shm_lock(size, current->user)) + if (!user_shm_lock(size, current->cred->user)) return ERR_PTR(-ENOMEM); root = hugetlbfs_vfsmount->mnt_root; @@ -998,7 +998,7 @@ out_inode: out_dentry: dput(dentry); out_shm_unlock: - user_shm_unlock(size, current->user); + user_shm_unlock(size, current->cred->user); return ERR_PTR(error); } diff --git a/fs/ioprio.c b/fs/ioprio.c index 68d2cd807118..bb5210af77c2 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -32,8 +32,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) int err; struct io_context *ioc; - if (task->uid != current_euid() && - task->uid != current_uid() && !capable(CAP_SYS_NICE)) + if (task->cred->uid != current_euid() && + task->cred->uid != current_uid() && !capable(CAP_SYS_NICE)) return -EPERM; err = security_task_setioprio(task, ioprio); @@ -123,7 +123,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; case IOPRIO_WHO_USER: if (!who) - user = current->user; + user = current->cred->user; else user = find_user(who); @@ -131,7 +131,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; do_each_thread(g, p) { - if (p->uid != who) + if (p->cred->uid != who) continue; ret = set_task_ioprio(p, ioprio); if (ret) @@ -216,7 +216,7 @@ asmlinkage long sys_ioprio_get(int which, int who) break; case IOPRIO_WHO_USER: if (!who) - user = current->user; + user = current->cred->user; else user = find_user(who); @@ -224,7 +224,7 @@ asmlinkage long sys_ioprio_get(int which, int who) break; do_each_thread(g, p) { - if (p->uid != user->uid) + if (p->cred->uid != user->uid) continue; tmpio = get_task_ioprio(p); if (tmpio < 0) diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 294992e9bf69..808fc03a6fbd 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -27,6 +27,7 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) { + struct cred *act_as = current->cred ; struct svc_cred cred = rqstp->rq_cred; int i; int flags = nfsexp_flags(rqstp, exp); @@ -55,25 +56,26 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) get_group_info(cred.cr_group_info); if (cred.cr_uid != (uid_t) -1) - current->fsuid = cred.cr_uid; + act_as->fsuid = cred.cr_uid; else - current->fsuid = exp->ex_anon_uid; + act_as->fsuid = exp->ex_anon_uid; if (cred.cr_gid != (gid_t) -1) - current->fsgid = cred.cr_gid; + act_as->fsgid = cred.cr_gid; else - current->fsgid = exp->ex_anon_gid; + act_as->fsgid = exp->ex_anon_gid; if (!cred.cr_group_info) return -ENOMEM; - ret = set_current_groups(cred.cr_group_info); + ret = set_groups(act_as, cred.cr_group_info); put_group_info(cred.cr_group_info); if ((cred.cr_uid)) { - current->cap_effective = - cap_drop_nfsd_set(current->cap_effective); + act_as->cap_effective = + cap_drop_nfsd_set(act_as->cap_effective); } else { - current->cap_effective = - cap_raise_nfsd_set(current->cap_effective, - current->cap_permitted); + act_as->cap_effective = + cap_raise_nfsd_set(act_as->cap_effective, + act_as->cap_permitted); } return ret; } + diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index bb93946ace22..a5e14e8695ea 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -57,17 +57,17 @@ static int rec_dir_init = 0; static void nfs4_save_user(uid_t *saveuid, gid_t *savegid) { - *saveuid = current->fsuid; - *savegid = current->fsgid; - current->fsuid = 0; - current->fsgid = 0; + *saveuid = current->cred->fsuid; + *savegid = current->cred->fsgid; + current->cred->fsuid = 0; + current->cred->fsgid = 0; } static void nfs4_reset_user(uid_t saveuid, gid_t savegid) { - current->fsuid = saveuid; - current->fsgid = savegid; + current->cred->fsuid = saveuid; + current->cred->fsgid = savegid; } static void diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index cd25d91895a1..e67cfaea0865 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -186,9 +186,9 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) * access control settings being in effect, we cannot * fix that case easily. */ - current->cap_effective = - cap_raise_nfsd_set(current->cap_effective, - current->cap_permitted); + current->cred->cap_effective = + cap_raise_nfsd_set(current->cred->cap_effective, + current->cred->cap_permitted); } else { error = nfsd_setuser_and_check_port(rqstp, exp); if (error) diff --git a/fs/open.c b/fs/open.c index 500cc0c54762..b1238e195e7e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -425,6 +425,7 @@ out: */ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) { + struct cred *cred = current->cred; struct path path; struct inode *inode; int old_fsuid, old_fsgid; @@ -434,18 +435,18 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; - old_fsuid = current->fsuid; - old_fsgid = current->fsgid; + old_fsuid = cred->fsuid; + old_fsgid = cred->fsgid; - current->fsuid = current->uid; - current->fsgid = current->gid; + cred->fsuid = cred->uid; + cred->fsgid = cred->gid; if (!issecure(SECURE_NO_SETUID_FIXUP)) { /* Clear the capabilities if we switch to a non-root user */ - if (current->uid) + if (current->cred->uid) old_cap = cap_set_effective(__cap_empty_set); else - old_cap = cap_set_effective(current->cap_permitted); + old_cap = cap_set_effective(cred->cap_permitted); } res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); @@ -484,8 +485,8 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) out_path_release: path_put(&path); out: - current->fsuid = old_fsuid; - current->fsgid = old_fsgid; + cred->fsuid = old_fsuid; + cred->fsgid = old_fsgid; if (!issecure(SECURE_NO_SETUID_FIXUP)) cap_set_effective(old_cap); diff --git a/fs/proc/array.c b/fs/proc/array.c index 6af7fba7abb1..62fe9b2009b6 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -182,8 +182,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_tgid_nr_ns(p, ns), pid_nr_ns(pid, ns), ppid, tpid, - p->uid, p->euid, p->suid, p->fsuid, - p->gid, p->egid, p->sgid, p->fsgid); + p->cred->uid, p->cred->euid, p->cred->suid, p->cred->fsuid, + p->cred->gid, p->cred->egid, p->cred->sgid, p->cred->fsgid); task_lock(p); if (p->files) @@ -194,7 +194,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, fdt ? fdt->max_fds : 0); rcu_read_unlock(); - group_info = p->group_info; + group_info = p->cred->group_info; get_group_info(group_info); task_unlock(p); @@ -262,7 +262,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) blocked = p->blocked; collect_sigign_sigcatch(p, &ignored, &caught); num_threads = atomic_read(&p->signal->count); - qsize = atomic_read(&p->user->sigpending); + qsize = atomic_read(&p->cred->user->sigpending); qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; unlock_task_sighand(p, &flags); } @@ -293,10 +293,12 @@ static void render_cap_t(struct seq_file *m, const char *header, static inline void task_cap(struct seq_file *m, struct task_struct *p) { - render_cap_t(m, "CapInh:\t", &p->cap_inheritable); - render_cap_t(m, "CapPrm:\t", &p->cap_permitted); - render_cap_t(m, "CapEff:\t", &p->cap_effective); - render_cap_t(m, "CapBnd:\t", &p->cap_bset); + struct cred *cred = p->cred; + + render_cap_t(m, "CapInh:\t", &cred->cap_inheritable); + render_cap_t(m, "CapPrm:\t", &cred->cap_permitted); + render_cap_t(m, "CapEff:\t", &cred->cap_effective); + render_cap_t(m, "CapBnd:\t", &cred->cap_bset); } static inline void task_context_switch_counts(struct seq_file *m, diff --git a/fs/proc/base.c b/fs/proc/base.c index 486cf3fe7139..6862b360c36c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1428,8 +1428,8 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st inode->i_uid = 0; inode->i_gid = 0; if (task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + inode->i_uid = task->cred->euid; + inode->i_gid = task->cred->egid; } security_task_to_inode(task, inode); @@ -1454,8 +1454,8 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - stat->uid = task->euid; - stat->gid = task->egid; + stat->uid = task->cred->euid; + stat->gid = task->cred->egid; } } rcu_read_unlock(); @@ -1486,8 +1486,8 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + inode->i_uid = task->cred->euid; + inode->i_gid = task->cred->egid; } else { inode->i_uid = 0; inode->i_gid = 0; @@ -1658,8 +1658,8 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) rcu_read_unlock(); put_files_struct(files); if (task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + inode->i_uid = task->cred->euid; + inode->i_gid = task->cred->egid; } else { inode->i_uid = 0; inode->i_gid = 0; diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h index 293043a5573a..8c022cd0ad67 100644 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ b/fs/xfs/linux-2.6/xfs_cred.h @@ -23,11 +23,9 @@ /* * Credentials */ -typedef struct cred { - /* EMPTY */ -} cred_t; +typedef const struct cred cred_t; -extern struct cred *sys_cred; +extern cred_t *sys_cred; /* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ static inline int capable_cred(cred_t *cr, int cid) diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h index 2770b0085ee8..6eda8a3eb6f1 100644 --- a/fs/xfs/linux-2.6/xfs_globals.h +++ b/fs/xfs/linux-2.6/xfs_globals.h @@ -19,6 +19,6 @@ #define __XFS_GLOBALS_H__ extern uint64_t xfs_panic_mask; /* set to cause more panics */ -extern struct cred *sys_cred; +extern cred_t *sys_cred; #endif /* __XFS_GLOBALS_H__ */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 1420c49674d7..6be310d41daf 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -497,7 +497,7 @@ int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, xfs_inode_t **, xfs_daddr_t, uint); int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, - xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, + xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t, int, struct xfs_buf **, boolean_t *, xfs_inode_t **); void xfs_dinode_from_disk(struct xfs_icdinode *, struct xfs_dinode_core *); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index e932a96bec54..7b0c2ab88333 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -16,7 +16,7 @@ struct xfs_iomap; int xfs_open(struct xfs_inode *ip); int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags, - struct cred *credp); + cred_t *credp); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ @@ -28,24 +28,24 @@ int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, - xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp); + xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp); int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode *ip); int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, struct xfs_name *target_name); int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name, - mode_t mode, struct xfs_inode **ipp, struct cred *credp); + mode_t mode, struct xfs_inode **ipp, cred_t *credp); int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, xfs_off_t *offset, filldir_t filldir); int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, mode_t mode, struct xfs_inode **ipp, - struct cred *credp); + cred_t *credp); int xfs_inode_flush(struct xfs_inode *ip, int flags); int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); int xfs_reclaim(struct xfs_inode *ip); int xfs_change_file_space(struct xfs_inode *ip, int cmd, xfs_flock64_t *bf, xfs_off_t offset, - struct cred *credp, int attr_flags); + cred_t *credp, int attr_flags); int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, struct xfs_inode *target_dp, struct xfs_name *target_name, struct xfs_inode *target_ip); diff --git a/include/linux/cred.h b/include/linux/cred.h index b69222cc1fd2..3e65587a72e5 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -12,39 +12,150 @@ #ifndef _LINUX_CRED_H #define _LINUX_CRED_H -#define get_current_user() (get_uid(current->user)) - -#define task_uid(task) ((task)->uid) -#define task_gid(task) ((task)->gid) -#define task_euid(task) ((task)->euid) -#define task_egid(task) ((task)->egid) - -#define current_uid() (current->uid) -#define current_gid() (current->gid) -#define current_euid() (current->euid) -#define current_egid() (current->egid) -#define current_suid() (current->suid) -#define current_sgid() (current->sgid) -#define current_fsuid() (current->fsuid) -#define current_fsgid() (current->fsgid) -#define current_cap() (current->cap_effective) +#include +#include +#include + +struct user_struct; +struct cred; + +/* + * COW Supplementary groups list + */ +#define NGROUPS_SMALL 32 +#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) + +struct group_info { + atomic_t usage; + int ngroups; + int nblocks; + gid_t small_block[NGROUPS_SMALL]; + gid_t *blocks[0]; +}; + +/** + * get_group_info - Get a reference to a group info structure + * @group_info: The group info to reference + * + * This must be called with the owning task locked (via task_lock()) when task + * != current. The reason being that the vast majority of callers are looking + * at current->group_info, which can not be changed except by the current task. + * Changing current->group_info requires the task lock, too. + */ +#define get_group_info(group_info) \ +do { \ + atomic_inc(&(group_info)->usage); \ +} while (0) + +/** + * put_group_info - Release a reference to a group info structure + * @group_info: The group info to release + */ +#define put_group_info(group_info) \ +do { \ + if (atomic_dec_and_test(&(group_info)->usage)) \ + groups_free(group_info); \ +} while (0) + +extern struct group_info *groups_alloc(int); +extern void groups_free(struct group_info *); +extern int set_current_groups(struct group_info *); +extern int set_groups(struct cred *, struct group_info *); +extern int groups_search(struct group_info *, gid_t); + +/* access the groups "array" with this macro */ +#define GROUP_AT(gi, i) \ + ((gi)->blocks[(i) / NGROUPS_PER_BLOCK][(i) % NGROUPS_PER_BLOCK]) + +extern int in_group_p(gid_t); +extern int in_egroup_p(gid_t); + +/* + * The security context of a task + * + * The parts of the context break down into two categories: + * + * (1) The objective context of a task. These parts are used when some other + * task is attempting to affect this one. + * + * (2) The subjective context. These details are used when the task is acting + * upon another object, be that a file, a task, a key or whatever. + * + * Note that some members of this structure belong to both categories - the + * LSM security pointer for instance. + * + * A task has two security pointers. task->real_cred points to the objective + * context that defines that task's actual details. The objective part of this + * context is used whenever that task is acted upon. + * + * task->cred points to the subjective context that defines the details of how + * that task is going to act upon another object. This may be overridden + * temporarily to point to another security context, but normally points to the + * same context as task->real_cred. + */ +struct cred { + atomic_t usage; + uid_t uid; /* real UID of the task */ + gid_t gid; /* real GID of the task */ + uid_t suid; /* saved UID of the task */ + gid_t sgid; /* saved GID of the task */ + uid_t euid; /* effective UID of the task */ + gid_t egid; /* effective GID of the task */ + uid_t fsuid; /* UID for VFS ops */ + gid_t fsgid; /* GID for VFS ops */ + unsigned securebits; /* SUID-less security management */ + kernel_cap_t cap_inheritable; /* caps our children can inherit */ + kernel_cap_t cap_permitted; /* caps we're permitted */ + kernel_cap_t cap_effective; /* caps we can actually use */ + kernel_cap_t cap_bset; /* capability bounding set */ +#ifdef CONFIG_KEYS + unsigned char jit_keyring; /* default keyring to attach requested + * keys to */ + struct key *thread_keyring; /* keyring private to this thread */ + struct key *request_key_auth; /* assumed request_key authority */ +#endif +#ifdef CONFIG_SECURITY + void *security; /* subjective LSM security */ +#endif + struct user_struct *user; /* real user ID subscription */ + struct group_info *group_info; /* supplementary groups for euid/fsgid */ + struct rcu_head rcu; /* RCU deletion hook */ + spinlock_t lock; /* lock for pointer changes */ +}; + +#define get_current_user() (get_uid(current->cred->user)) + +#define task_uid(task) ((task)->cred->uid) +#define task_gid(task) ((task)->cred->gid) +#define task_euid(task) ((task)->cred->euid) +#define task_egid(task) ((task)->cred->egid) + +#define current_uid() (current->cred->uid) +#define current_gid() (current->cred->gid) +#define current_euid() (current->cred->euid) +#define current_egid() (current->cred->egid) +#define current_suid() (current->cred->suid) +#define current_sgid() (current->cred->sgid) +#define current_fsuid() (current->cred->fsuid) +#define current_fsgid() (current->cred->fsgid) +#define current_cap() (current->cred->cap_effective) #define current_uid_gid(_uid, _gid) \ do { \ - *(_uid) = current->uid; \ - *(_gid) = current->gid; \ + *(_uid) = current->cred->uid; \ + *(_gid) = current->cred->gid; \ } while(0) #define current_euid_egid(_uid, _gid) \ do { \ - *(_uid) = current->euid; \ - *(_gid) = current->egid; \ + *(_uid) = current->cred->euid; \ + *(_gid) = current->cred->egid; \ } while(0) #define current_fsuid_fsgid(_uid, _gid) \ do { \ - *(_uid) = current->fsuid; \ - *(_gid) = current->fsgid; \ + *(_uid) = current->cred->fsuid; \ + *(_gid) = current->cred->fsgid; \ } while(0) #endif /* _LINUX_CRED_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 23fd8909b9e5..9de41ccd67b5 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -113,6 +113,21 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_INIT_EFF_SET #endif +extern struct cred init_cred; + +#define INIT_CRED(p) \ +{ \ + .usage = ATOMIC_INIT(3), \ + .securebits = SECUREBITS_DEFAULT, \ + .cap_inheritable = CAP_INIT_INH_SET, \ + .cap_permitted = CAP_FULL_SET, \ + .cap_effective = CAP_INIT_EFF_SET, \ + .cap_bset = CAP_INIT_BSET, \ + .user = INIT_USER, \ + .group_info = &init_groups, \ + .lock = __SPIN_LOCK_UNLOCKED(p.lock), \ +} + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -147,13 +162,8 @@ extern struct group_info init_groups; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - .group_info = &init_groups, \ - .cap_effective = CAP_INIT_EFF_SET, \ - .cap_inheritable = CAP_INIT_INH_SET, \ - .cap_permitted = CAP_FULL_SET, \ - .cap_bset = CAP_INIT_BSET, \ - .securebits = SECUREBITS_DEFAULT, \ - .user = INIT_USER, \ + .__temp_cred = INIT_CRED(tsk.__temp_cred), \ + .cred = &tsk.__temp_cred, \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..c8b92502354d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -660,6 +660,7 @@ extern struct user_struct *find_user(uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) + struct backing_dev_info; struct reclaim_state; @@ -883,38 +884,7 @@ partition_sched_domains(int ndoms_new, cpumask_t *doms_new, #endif /* !CONFIG_SMP */ struct io_context; /* See blkdev.h */ -#define NGROUPS_SMALL 32 -#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) -struct group_info { - int ngroups; - atomic_t usage; - gid_t small_block[NGROUPS_SMALL]; - int nblocks; - gid_t *blocks[0]; -}; - -/* - * get_group_info() must be called with the owning task locked (via task_lock()) - * when task != current. The reason being that the vast majority of callers are - * looking at current->group_info, which can not be changed except by the - * current task. Changing current->group_info requires the task lock, too. - */ -#define get_group_info(group_info) do { \ - atomic_inc(&(group_info)->usage); \ -} while (0) -#define put_group_info(group_info) do { \ - if (atomic_dec_and_test(&(group_info)->usage)) \ - groups_free(group_info); \ -} while (0) - -extern struct group_info *groups_alloc(int gidsetsize); -extern void groups_free(struct group_info *group_info); -extern int set_current_groups(struct group_info *group_info); -extern int groups_search(struct group_info *group_info, gid_t grp); -/* access the groups "array" with this macro */ -#define GROUP_AT(gi, i) \ - ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK extern void prefetch_stack(struct task_struct *t); @@ -1181,17 +1151,9 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - uid_t uid,euid,suid,fsuid; - gid_t gid,egid,sgid,fsgid; - struct group_info *group_info; - kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; - struct user_struct *user; - unsigned securebits; -#ifdef CONFIG_KEYS - unsigned char jit_keyring; /* default keyring to attach requested keys to */ - struct key *request_key_auth; /* assumed request_key authority */ - struct key *thread_keyring; /* keyring private to this thread */ -#endif + struct cred __temp_cred __deprecated; /* temporary credentials to be removed */ + struct cred *cred; /* actual/objective task credentials */ + char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) @@ -1228,9 +1190,6 @@ struct task_struct { int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; -#ifdef CONFIG_SECURITY - void *security; -#endif struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL uid_t loginuid; @@ -1787,9 +1746,6 @@ extern void wake_up_new_task(struct task_struct *tsk, extern void sched_fork(struct task_struct *p, int clone_flags); extern void sched_dead(struct task_struct *p); -extern int in_group_p(gid_t); -extern int in_egroup_p(gid_t); - extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); diff --git a/include/linux/securebits.h b/include/linux/securebits.h index 92f09bdf1175..6d389491bfa2 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -32,7 +32,7 @@ setting is locked or not. A setting which is locked cannot be changed from user-level. */ #define issecure_mask(X) (1 << (X)) -#define issecure(X) (issecure_mask(X) & current->securebits) +#define issecure(X) (issecure_mask(X) & current->cred->securebits) #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index abda5991d7e3..e1885b494bac 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -126,7 +126,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode, if (S_ISREG(mode)) { struct mqueue_inode_info *info; struct task_struct *p = current; - struct user_struct *u = p->user; + struct user_struct *u = p->cred->user; unsigned long mq_bytes, mq_msg_tblsz; inode->i_fop = &mqueue_file_operations; diff --git a/ipc/shm.c b/ipc/shm.c index 0c3debbe32d5..264a9d33c5dd 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -366,7 +366,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (shmflg & SHM_HUGETLB) { /* hugetlb_file_setup takes care of mlock user accounting */ file = hugetlb_file_setup(name, size); - shp->mlock_user = current->user; + shp->mlock_user = current->cred->user; } else { int acctflag = VM_ACCOUNT; /* @@ -767,7 +767,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) goto out_unlock; if(cmd==SHM_LOCK) { - struct user_struct * user = current->user; + struct user_struct *user = current->cred->user; if (!is_file_hugepages(shp->shm_file)) { err = shmem_lock(shp->shm_file, 1, user); if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 9c7e47ae4576..2febf5165fad 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -447,6 +447,7 @@ static int audit_filter_rules(struct task_struct *tsk, struct audit_names *name, enum audit_state *state) { + struct cred *cred = tsk->cred; int i, j, need_sid = 1; u32 sid; @@ -466,28 +467,28 @@ static int audit_filter_rules(struct task_struct *tsk, } break; case AUDIT_UID: - result = audit_comparator(tsk->uid, f->op, f->val); + result = audit_comparator(cred->uid, f->op, f->val); break; case AUDIT_EUID: - result = audit_comparator(tsk->euid, f->op, f->val); + result = audit_comparator(cred->euid, f->op, f->val); break; case AUDIT_SUID: - result = audit_comparator(tsk->suid, f->op, f->val); + result = audit_comparator(cred->suid, f->op, f->val); break; case AUDIT_FSUID: - result = audit_comparator(tsk->fsuid, f->op, f->val); + result = audit_comparator(cred->fsuid, f->op, f->val); break; case AUDIT_GID: - result = audit_comparator(tsk->gid, f->op, f->val); + result = audit_comparator(cred->gid, f->op, f->val); break; case AUDIT_EGID: - result = audit_comparator(tsk->egid, f->op, f->val); + result = audit_comparator(cred->egid, f->op, f->val); break; case AUDIT_SGID: - result = audit_comparator(tsk->sgid, f->op, f->val); + result = audit_comparator(cred->sgid, f->op, f->val); break; case AUDIT_FSGID: - result = audit_comparator(tsk->fsgid, f->op, f->val); + result = audit_comparator(cred->fsgid, f->op, f->val); break; case AUDIT_PERS: result = audit_comparator(tsk->personality, f->op, f->val); @@ -1228,6 +1229,7 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { + struct cred *cred = tsk->cred; int i, call_panic = 0; struct audit_buffer *ab; struct audit_aux_data *aux; @@ -1237,14 +1239,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->pid = tsk->pid; if (!context->ppid) context->ppid = sys_getppid(); - context->uid = tsk->uid; - context->gid = tsk->gid; - context->euid = tsk->euid; - context->suid = tsk->suid; - context->fsuid = tsk->fsuid; - context->egid = tsk->egid; - context->sgid = tsk->sgid; - context->fsgid = tsk->fsgid; + context->uid = cred->uid; + context->gid = cred->gid; + context->euid = cred->euid; + context->suid = cred->suid; + context->fsuid = cred->fsuid; + context->egid = cred->egid; + context->sgid = cred->sgid; + context->fsgid = cred->fsgid; context->personality = tsk->personality; ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); @@ -2086,7 +2088,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid) audit_log_format(ab, "login pid=%d uid=%u " "old auid=%u new auid=%u" " old ses=%u new ses=%u", - task->pid, task->uid, + task->pid, task->cred->uid, task->loginuid, loginuid, task->sessionid, sessionid); audit_log_end(ab); @@ -2469,7 +2471,7 @@ void __audit_ptrace(struct task_struct *t) context->target_pid = t->pid; context->target_auid = audit_get_loginuid(t); - context->target_uid = t->uid; + context->target_uid = t->cred->uid; context->target_sessionid = audit_get_sessionid(t); security_task_getsecid(t, &context->target_sid); memcpy(context->target_comm, t->comm, TASK_COMM_LEN); @@ -2495,7 +2497,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (tsk->loginuid != -1) audit_sig_uid = tsk->loginuid; else - audit_sig_uid = tsk->uid; + audit_sig_uid = tsk->cred->uid; security_task_getsecid(tsk, &audit_sig_sid); } if (!audit_signals || audit_dummy_context()) @@ -2507,7 +2509,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (!ctx->target_pid) { ctx->target_pid = t->tgid; ctx->target_auid = audit_get_loginuid(t); - ctx->target_uid = t->uid; + ctx->target_uid = t->cred->uid; ctx->target_sessionid = audit_get_sessionid(t); security_task_getsecid(t, &ctx->target_sid); memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN); @@ -2528,7 +2530,7 @@ int __audit_signal_info(int sig, struct task_struct *t) axp->target_pid[axp->pid_count] = t->tgid; axp->target_auid[axp->pid_count] = audit_get_loginuid(t); - axp->target_uid[axp->pid_count] = t->uid; + axp->target_uid[axp->pid_count] = t->cred->uid; axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t); security_task_getsecid(t, &axp->target_sid[axp->pid_count]); memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN); @@ -2575,12 +2577,12 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_ ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; ax->old_pcap.permitted = *pP; - ax->old_pcap.inheritable = current->cap_inheritable; + ax->old_pcap.inheritable = current->cred->cap_inheritable; ax->old_pcap.effective = *pE; - ax->new_pcap.permitted = current->cap_permitted; - ax->new_pcap.inheritable = current->cap_inheritable; - ax->new_pcap.effective = current->cap_effective; + ax->new_pcap.permitted = current->cred->cap_permitted; + ax->new_pcap.inheritable = current->cred->cap_inheritable; + ax->new_pcap.effective = current->cred->cap_effective; } /** diff --git a/kernel/capability.c b/kernel/capability.c index 58b00519624a..a404b980b1bd 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -171,8 +171,8 @@ kernel_cap_t cap_set_effective(const kernel_cap_t pE_new) spin_lock(&task_capability_lock); - pE_old = current->cap_effective; - current->cap_effective = pE_new; + pE_old = current->cred->cap_effective; + current->cred->cap_effective = pE_new; spin_unlock(&task_capability_lock); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 78f9b310c4f3..e210526e6401 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1293,7 +1293,9 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) rcu_read_unlock(); euid = current_euid(); - if (euid && euid != tsk->uid && euid != tsk->suid) { + if (euid && + euid != tsk->cred->uid && + euid != tsk->cred->suid) { put_task_struct(tsk); return -EACCES; } diff --git a/kernel/exit.c b/kernel/exit.c index 80137a5d9467..e0f6e1892fb9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -160,7 +160,7 @@ void release_task(struct task_struct * p) int zap_leader; repeat: tracehook_prepare_release_task(p); - atomic_dec(&p->user->processes); + atomic_dec(&p->cred->user->processes); proc_flush_task(p); write_lock_irq(&tasklist_lock); tracehook_finish_release_task(p); @@ -1272,7 +1272,7 @@ static int wait_task_zombie(struct task_struct *p, int options, return 0; if (unlikely(options & WNOWAIT)) { - uid_t uid = p->uid; + uid_t uid = p->cred->uid; int exit_code = p->exit_code; int why, status; @@ -1393,7 +1393,7 @@ static int wait_task_zombie(struct task_struct *p, int options, if (!retval && infop) retval = put_user(pid, &infop->si_pid); if (!retval && infop) - retval = put_user(p->uid, &infop->si_uid); + retval = put_user(p->cred->uid, &infop->si_uid); if (!retval) retval = pid; @@ -1458,7 +1458,7 @@ static int wait_task_stopped(int ptrace, struct task_struct *p, if (!unlikely(options & WNOWAIT)) p->exit_code = 0; - uid = p->uid; + uid = p->cred->uid; unlock_sig: spin_unlock_irq(&p->sighand->siglock); if (!exit_code) @@ -1535,7 +1535,7 @@ static int wait_task_continued(struct task_struct *p, int options, spin_unlock_irq(&p->sighand->siglock); pid = task_pid_vnr(p); - uid = p->uid; + uid = p->cred->uid; get_task_struct(p); read_unlock(&tasklist_lock); diff --git a/kernel/fork.c b/kernel/fork.c index f6083561dfe0..81fdc7733908 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -147,8 +147,8 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(tsk == current); security_task_free(tsk); - free_uid(tsk->user); - put_group_info(tsk->group_info); + free_uid(tsk->__temp_cred.user); + put_group_info(tsk->__temp_cred.group_info); delayacct_tsk_free(tsk); if (!profile_handoff_task(tsk)) @@ -969,17 +969,18 @@ static struct task_struct *copy_process(unsigned long clone_flags, DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif + p->cred = &p->__temp_cred; retval = -EAGAIN; - if (atomic_read(&p->user->processes) >= + if (atomic_read(&p->cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->user != current->nsproxy->user_ns->root_user) + p->cred->user != current->nsproxy->user_ns->root_user) goto bad_fork_free; } - atomic_inc(&p->user->__count); - atomic_inc(&p->user->processes); - get_group_info(p->group_info); + atomic_inc(&p->cred->user->__count); + atomic_inc(&p->cred->user->processes); + get_group_info(p->cred->group_info); /* * If multiple threads are within copy_process(), then this check @@ -1035,9 +1036,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); #ifdef CONFIG_SECURITY - p->security = NULL; + p->cred->security = NULL; #endif - p->cap_bset = current->cap_bset; p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); @@ -1298,9 +1298,9 @@ bad_fork_cleanup_cgroup: bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: - put_group_info(p->group_info); - atomic_dec(&p->user->processes); - free_uid(p->user); + put_group_info(p->cred->group_info); + atomic_dec(&p->cred->user->processes); + free_uid(p->cred->user); bad_fork_free: free_task(p); fork_out: diff --git a/kernel/futex.c b/kernel/futex.c index e06962132aaf..28421d8210b8 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -443,7 +443,8 @@ static struct task_struct * futex_find_get_task(pid_t pid) rcu_read_lock(); p = find_task_by_vpid(pid); - if (!p || (euid != p->euid && euid != p->uid)) + if (!p || (euid != p->cred->euid && + euid != p->cred->uid)) p = ERR_PTR(-ESRCH); else get_task_struct(p); @@ -1846,7 +1847,8 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if (euid != p->euid && euid != p->uid && + if (euid != p->cred->euid && + euid != p->cred->uid && !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->robust_list; diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 3254d4e41e88..2c3fd5ed34f5 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -151,8 +151,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if (euid != p->euid && euid != p->uid && - !capable(CAP_SYS_PTRACE)) + if (euid != p->cred->euid && + euid != p->cred->uid && + !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->compat_robust_list; read_unlock(&tasklist_lock); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 937f6b5b2008..49849d12dd12 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -115,6 +115,8 @@ int ptrace_check_attach(struct task_struct *child, int kill) int __ptrace_may_access(struct task_struct *task, unsigned int mode) { + struct cred *cred = current->cred, *tcred = task->cred; + /* May we inspect the given task? * This check is used both for attaching with ptrace * and for allowing access to sensitive information in /proc. @@ -123,19 +125,18 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ - uid_t uid; - gid_t gid; + uid_t uid = cred->uid; + gid_t gid = cred->gid; int dumpable = 0; /* Don't let security modules deny introspection */ if (task == current) return 0; - current_uid_gid(&uid, &gid); - if ((uid != task->euid || - uid != task->suid || - uid != task->uid || - gid != task->egid || - gid != task->sgid || - gid != task->gid) && !capable(CAP_SYS_PTRACE)) + if ((uid != tcred->euid || + uid != tcred->suid || + uid != tcred->uid || + gid != tcred->egid || + gid != tcred->sgid || + gid != tcred->gid) && !capable(CAP_SYS_PTRACE)) return -EPERM; smp_rmb(); if (task->mm) diff --git a/kernel/sched.c b/kernel/sched.c index c3b8b1fcde0d..733c59e645aa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -345,7 +345,7 @@ static inline struct task_group *task_group(struct task_struct *p) struct task_group *tg; #ifdef CONFIG_USER_SCHED - tg = p->user->tg; + tg = p->cred->user->tg; #elif defined(CONFIG_CGROUP_SCHED) tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), struct task_group, css); @@ -5182,8 +5182,8 @@ recheck: /* can't change other user's priorities */ euid = current_euid(); - if (euid != p->euid && - euid != p->uid) + if (euid != p->cred->euid && + euid != p->cred->uid) return -EPERM; } @@ -5417,7 +5417,9 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) euid = current_euid(); retval = -EPERM; - if (euid != p->euid && euid != p->uid && !capable(CAP_SYS_NICE)) + if (euid != p->cred->euid && + euid != p->cred->uid && + !capable(CAP_SYS_NICE)) goto out_unlock; retval = security_task_setscheduler(p, 0, NULL); diff --git a/kernel/signal.c b/kernel/signal.c index 167b535fe1a9..80e8a6489f97 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -187,7 +187,7 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, * In order to avoid problems with "switch_user()", we want to make * sure that the compiler doesn't re-load "t->user" */ - user = t->user; + user = t->cred->user; barrier(); atomic_inc(&user->sigpending); if (override_rlimit || @@ -582,8 +582,8 @@ static int check_kill_permission(int sig, struct siginfo *info, uid = current_uid(); euid = current_euid(); - if ((euid ^ t->suid) && (euid ^ t->uid) && - (uid ^ t->suid) && (uid ^ t->uid) && + if ((euid ^ t->cred->suid) && (euid ^ t->cred->uid) && + (uid ^ t->cred->suid) && (uid ^ t->cred->uid) && !capable(CAP_KILL)) { switch (sig) { case SIGCONT: @@ -1100,8 +1100,8 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, goto out_unlock; } if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) - && (euid != p->suid) && (euid != p->uid) - && (uid != p->suid) && (uid != p->uid)) { + && (euid != p->cred->suid) && (euid != p->cred->uid) + && (uid != p->cred->suid) && (uid != p->cred->uid)) { ret = -EPERM; goto out_unlock; } @@ -1374,7 +1374,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); rcu_read_unlock(); - info.si_uid = tsk->uid; + info.si_uid = tsk->cred->uid; thread_group_cputime(tsk, &cputime); info.si_utime = cputime_to_jiffies(cputime.utime); @@ -1445,7 +1445,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); rcu_read_unlock(); - info.si_uid = tsk->uid; + info.si_uid = tsk->cred->uid; info.si_utime = cputime_to_clock_t(tsk->utime); info.si_stime = cputime_to_clock_t(tsk->stime); @@ -1713,7 +1713,7 @@ static int ptrace_signal(int signr, siginfo_t *info, info->si_errno = 0; info->si_code = SI_USER; info->si_pid = task_pid_vnr(current->parent); - info->si_uid = current->parent->uid; + info->si_uid = current->parent->cred->uid; } /* If the (new) signal is now blocked, requeue it. */ diff --git a/kernel/sys.c b/kernel/sys.c index ed5c29c748ac..5d81f07c0150 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -117,7 +117,9 @@ static int set_one_prio(struct task_struct *p, int niceval, int error) uid_t euid = current_euid(); int no_nice; - if (p->uid != euid && p->euid != euid && !capable(CAP_SYS_NICE)) { + if (p->cred->uid != euid && + p->cred->euid != euid && + !capable(CAP_SYS_NICE)) { error = -EPERM; goto out; } @@ -174,7 +176,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = current->user; + user = current->cred->user; if (!who) who = current_uid(); else @@ -182,7 +184,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) - if (p->uid == who) + if (p->cred->uid == who) error = set_one_prio(p, niceval, error); while_each_thread(g, p); if (who != current_uid()) @@ -236,7 +238,7 @@ asmlinkage long sys_getpriority(int which, int who) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = current->user; + user = current->cred->user; if (!who) who = current_uid(); else @@ -244,7 +246,7 @@ asmlinkage long sys_getpriority(int which, int who) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) - if (p->uid == who) { + if (p->cred->uid == who) { niceval = 20 - task_nice(p); if (niceval > retval) retval = niceval; @@ -472,8 +474,9 @@ void ctrl_alt_del(void) */ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) { - int old_rgid = current->gid; - int old_egid = current->egid; + struct cred *cred = current->cred; + int old_rgid = cred->gid; + int old_egid = cred->egid; int new_rgid = old_rgid; int new_egid = old_egid; int retval; @@ -484,7 +487,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) if (rgid != (gid_t) -1) { if ((old_rgid == rgid) || - (current->egid==rgid) || + (cred->egid == rgid) || capable(CAP_SETGID)) new_rgid = rgid; else @@ -492,8 +495,8 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) } if (egid != (gid_t) -1) { if ((old_rgid == egid) || - (current->egid == egid) || - (current->sgid == egid) || + (cred->egid == egid) || + (cred->sgid == egid) || capable(CAP_SETGID)) new_egid = egid; else @@ -505,10 +508,10 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) } if (rgid != (gid_t) -1 || (egid != (gid_t) -1 && egid != old_rgid)) - current->sgid = new_egid; - current->fsgid = new_egid; - current->egid = new_egid; - current->gid = new_rgid; + cred->sgid = new_egid; + cred->fsgid = new_egid; + cred->egid = new_egid; + cred->gid = new_rgid; key_fsgid_changed(current); proc_id_connector(current, PROC_EVENT_GID); return 0; @@ -521,7 +524,8 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) */ asmlinkage long sys_setgid(gid_t gid) { - int old_egid = current->egid; + struct cred *cred = current->cred; + int old_egid = cred->egid; int retval; retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID); @@ -533,13 +537,13 @@ asmlinkage long sys_setgid(gid_t gid) set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->gid = current->egid = current->sgid = current->fsgid = gid; - } else if ((gid == current->gid) || (gid == current->sgid)) { + cred->gid = cred->egid = cred->sgid = cred->fsgid = gid; + } else if ((gid == cred->gid) || (gid == cred->sgid)) { if (old_egid != gid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->egid = current->fsgid = gid; + cred->egid = cred->fsgid = gid; } else return -EPERM; @@ -570,7 +574,7 @@ static int set_user(uid_t new_ruid, int dumpclear) set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->uid = new_ruid; + current->cred->uid = new_ruid; return 0; } @@ -591,6 +595,7 @@ static int set_user(uid_t new_ruid, int dumpclear) */ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) { + struct cred *cred = current->cred; int old_ruid, old_euid, old_suid, new_ruid, new_euid; int retval; @@ -598,14 +603,14 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) if (retval) return retval; - new_ruid = old_ruid = current->uid; - new_euid = old_euid = current->euid; - old_suid = current->suid; + new_ruid = old_ruid = cred->uid; + new_euid = old_euid = cred->euid; + old_suid = cred->suid; if (ruid != (uid_t) -1) { new_ruid = ruid; if ((old_ruid != ruid) && - (current->euid != ruid) && + (cred->euid != ruid) && !capable(CAP_SETUID)) return -EPERM; } @@ -613,8 +618,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) if (euid != (uid_t) -1) { new_euid = euid; if ((old_ruid != euid) && - (current->euid != euid) && - (current->suid != euid) && + (cred->euid != euid) && + (cred->suid != euid) && !capable(CAP_SETUID)) return -EPERM; } @@ -626,11 +631,11 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsuid = current->euid = new_euid; + cred->fsuid = cred->euid = new_euid; if (ruid != (uid_t) -1 || (euid != (uid_t) -1 && euid != old_ruid)) - current->suid = current->euid; - current->fsuid = current->euid; + cred->suid = cred->euid; + cred->fsuid = cred->euid; key_fsuid_changed(current); proc_id_connector(current, PROC_EVENT_UID); @@ -653,7 +658,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) */ asmlinkage long sys_setuid(uid_t uid) { - int old_euid = current->euid; + struct cred *cred = current->cred; + int old_euid = cred->euid; int old_ruid, old_suid, new_suid; int retval; @@ -661,23 +667,23 @@ asmlinkage long sys_setuid(uid_t uid) if (retval) return retval; - old_ruid = current->uid; - old_suid = current->suid; + old_ruid = cred->uid; + old_suid = cred->suid; new_suid = old_suid; if (capable(CAP_SETUID)) { if (uid != old_ruid && set_user(uid, old_euid != uid) < 0) return -EAGAIN; new_suid = uid; - } else if ((uid != current->uid) && (uid != new_suid)) + } else if ((uid != cred->uid) && (uid != new_suid)) return -EPERM; if (old_euid != uid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsuid = current->euid = uid; - current->suid = new_suid; + cred->fsuid = cred->euid = uid; + cred->suid = new_suid; key_fsuid_changed(current); proc_id_connector(current, PROC_EVENT_UID); @@ -692,9 +698,10 @@ asmlinkage long sys_setuid(uid_t uid) */ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) { - int old_ruid = current->uid; - int old_euid = current->euid; - int old_suid = current->suid; + struct cred *cred = current->cred; + int old_ruid = cred->uid; + int old_euid = cred->euid; + int old_suid = cred->suid; int retval; retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES); @@ -702,30 +709,31 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) return retval; if (!capable(CAP_SETUID)) { - if ((ruid != (uid_t) -1) && (ruid != current->uid) && - (ruid != current->euid) && (ruid != current->suid)) + if ((ruid != (uid_t) -1) && (ruid != cred->uid) && + (ruid != cred->euid) && (ruid != cred->suid)) return -EPERM; - if ((euid != (uid_t) -1) && (euid != current->uid) && - (euid != current->euid) && (euid != current->suid)) + if ((euid != (uid_t) -1) && (euid != cred->uid) && + (euid != cred->euid) && (euid != cred->suid)) return -EPERM; - if ((suid != (uid_t) -1) && (suid != current->uid) && - (suid != current->euid) && (suid != current->suid)) + if ((suid != (uid_t) -1) && (suid != cred->uid) && + (suid != cred->euid) && (suid != cred->suid)) return -EPERM; } if (ruid != (uid_t) -1) { - if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0) + if (ruid != cred->uid && + set_user(ruid, euid != cred->euid) < 0) return -EAGAIN; } if (euid != (uid_t) -1) { - if (euid != current->euid) { + if (euid != cred->euid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->euid = euid; + cred->euid = euid; } - current->fsuid = current->euid; + cred->fsuid = cred->euid; if (suid != (uid_t) -1) - current->suid = suid; + cred->suid = suid; key_fsuid_changed(current); proc_id_connector(current, PROC_EVENT_UID); @@ -735,11 +743,12 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) { + struct cred *cred = current->cred; int retval; - if (!(retval = put_user(current->uid, ruid)) && - !(retval = put_user(current->euid, euid))) - retval = put_user(current->suid, suid); + if (!(retval = put_user(cred->uid, ruid)) && + !(retval = put_user(cred->euid, euid))) + retval = put_user(cred->suid, suid); return retval; } @@ -749,6 +758,7 @@ asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __us */ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) { + struct cred *cred = current->cred; int retval; retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES); @@ -756,28 +766,28 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) return retval; if (!capable(CAP_SETGID)) { - if ((rgid != (gid_t) -1) && (rgid != current->gid) && - (rgid != current->egid) && (rgid != current->sgid)) + if ((rgid != (gid_t) -1) && (rgid != cred->gid) && + (rgid != cred->egid) && (rgid != cred->sgid)) return -EPERM; - if ((egid != (gid_t) -1) && (egid != current->gid) && - (egid != current->egid) && (egid != current->sgid)) + if ((egid != (gid_t) -1) && (egid != cred->gid) && + (egid != cred->egid) && (egid != cred->sgid)) return -EPERM; - if ((sgid != (gid_t) -1) && (sgid != current->gid) && - (sgid != current->egid) && (sgid != current->sgid)) + if ((sgid != (gid_t) -1) && (sgid != cred->gid) && + (sgid != cred->egid) && (sgid != cred->sgid)) return -EPERM; } if (egid != (gid_t) -1) { - if (egid != current->egid) { + if (egid != cred->egid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->egid = egid; + cred->egid = egid; } - current->fsgid = current->egid; + cred->fsgid = cred->egid; if (rgid != (gid_t) -1) - current->gid = rgid; + cred->gid = rgid; if (sgid != (gid_t) -1) - current->sgid = sgid; + cred->sgid = sgid; key_fsgid_changed(current); proc_id_connector(current, PROC_EVENT_GID); @@ -786,11 +796,12 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) { + struct cred *cred = current->cred; int retval; - if (!(retval = put_user(current->gid, rgid)) && - !(retval = put_user(current->egid, egid))) - retval = put_user(current->sgid, sgid); + if (!(retval = put_user(cred->gid, rgid)) && + !(retval = put_user(cred->egid, egid))) + retval = put_user(cred->sgid, sgid); return retval; } @@ -804,20 +815,21 @@ asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __us */ asmlinkage long sys_setfsuid(uid_t uid) { + struct cred *cred = current->cred; int old_fsuid; - old_fsuid = current->fsuid; + old_fsuid = cred->fsuid; if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS)) return old_fsuid; - if (uid == current->uid || uid == current->euid || - uid == current->suid || uid == current->fsuid || + if (uid == cred->uid || uid == cred->euid || + uid == cred->suid || uid == cred->fsuid || capable(CAP_SETUID)) { if (uid != old_fsuid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsuid = uid; + cred->fsuid = uid; } key_fsuid_changed(current); @@ -833,20 +845,21 @@ asmlinkage long sys_setfsuid(uid_t uid) */ asmlinkage long sys_setfsgid(gid_t gid) { + struct cred *cred = current->cred; int old_fsgid; - old_fsgid = current->fsgid; + old_fsgid = cred->fsgid; if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS)) return old_fsgid; - if (gid == current->gid || gid == current->egid || - gid == current->sgid || gid == current->fsgid || + if (gid == cred->gid || gid == cred->egid || + gid == cred->sgid || gid == cred->fsgid || capable(CAP_SETGID)) { if (gid != old_fsgid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsgid = gid; + cred->fsgid = gid; key_fsgid_changed(current); proc_id_connector(current, PROC_EVENT_GID); } @@ -1208,8 +1221,15 @@ int groups_search(struct group_info *group_info, gid_t grp) return 0; } -/* validate and set current->group_info */ -int set_current_groups(struct group_info *group_info) +/** + * set_groups - Change a group subscription in a security record + * @sec: The security record to alter + * @group_info: The group list to impose + * + * Validate a group subscription and, if valid, impose it upon a task security + * record. + */ +int set_groups(struct cred *cred, struct group_info *group_info) { int retval; struct group_info *old_info; @@ -1221,20 +1241,34 @@ int set_current_groups(struct group_info *group_info) groups_sort(group_info); get_group_info(group_info); - task_lock(current); - old_info = current->group_info; - current->group_info = group_info; - task_unlock(current); + spin_lock(&cred->lock); + old_info = cred->group_info; + cred->group_info = group_info; + spin_unlock(&cred->lock); put_group_info(old_info); - return 0; } +EXPORT_SYMBOL(set_groups); + +/** + * set_current_groups - Change current's group subscription + * @group_info: The group list to impose + * + * Validate a group subscription and, if valid, impose it upon current's task + * security record. + */ +int set_current_groups(struct group_info *group_info) +{ + return set_groups(current->cred, group_info); +} + EXPORT_SYMBOL(set_current_groups); asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) { + struct cred *cred = current->cred; int i = 0; /* @@ -1246,13 +1280,13 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) return -EINVAL; /* no need to grab task_lock here; it cannot change */ - i = current->group_info->ngroups; + i = cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups_to_user(grouplist, current->group_info)) { + if (groups_to_user(grouplist, cred->group_info)) { i = -EFAULT; goto out; } @@ -1296,9 +1330,10 @@ asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) */ int in_group_p(gid_t grp) { + struct cred *cred = current->cred; int retval = 1; - if (grp != current->fsgid) - retval = groups_search(current->group_info, grp); + if (grp != cred->fsgid) + retval = groups_search(cred->group_info, grp); return retval; } @@ -1306,9 +1341,10 @@ EXPORT_SYMBOL(in_group_p); int in_egroup_p(gid_t grp) { + struct cred *cred = current->cred; int retval = 1; - if (grp != current->egid) - retval = groups_search(current->group_info, grp); + if (grp != cred->egid) + retval = groups_search(cred->group_info, grp); return retval; } @@ -1624,7 +1660,9 @@ asmlinkage long sys_umask(int mask) asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { - long error = 0; + struct task_struct *me = current; + unsigned char comm[sizeof(me->comm)]; + long error; if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error)) return error; @@ -1635,39 +1673,41 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, error = -EINVAL; break; } - current->pdeath_signal = arg2; + me->pdeath_signal = arg2; + error = 0; break; case PR_GET_PDEATHSIG: - error = put_user(current->pdeath_signal, (int __user *)arg2); + error = put_user(me->pdeath_signal, (int __user *)arg2); break; case PR_GET_DUMPABLE: - error = get_dumpable(current->mm); + error = get_dumpable(me->mm); break; case PR_SET_DUMPABLE: if (arg2 < 0 || arg2 > 1) { error = -EINVAL; break; } - set_dumpable(current->mm, arg2); + set_dumpable(me->mm, arg2); + error = 0; break; case PR_SET_UNALIGN: - error = SET_UNALIGN_CTL(current, arg2); + error = SET_UNALIGN_CTL(me, arg2); break; case PR_GET_UNALIGN: - error = GET_UNALIGN_CTL(current, arg2); + error = GET_UNALIGN_CTL(me, arg2); break; case PR_SET_FPEMU: - error = SET_FPEMU_CTL(current, arg2); + error = SET_FPEMU_CTL(me, arg2); break; case PR_GET_FPEMU: - error = GET_FPEMU_CTL(current, arg2); + error = GET_FPEMU_CTL(me, arg2); break; case PR_SET_FPEXC: - error = SET_FPEXC_CTL(current, arg2); + error = SET_FPEXC_CTL(me, arg2); break; case PR_GET_FPEXC: - error = GET_FPEXC_CTL(current, arg2); + error = GET_FPEXC_CTL(me, arg2); break; case PR_GET_TIMING: error = PR_TIMING_STATISTICAL; @@ -1675,33 +1715,28 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, case PR_SET_TIMING: if (arg2 != PR_TIMING_STATISTICAL) error = -EINVAL; + else + error = 0; break; - case PR_SET_NAME: { - struct task_struct *me = current; - unsigned char ncomm[sizeof(me->comm)]; - - ncomm[sizeof(me->comm)-1] = 0; - if (strncpy_from_user(ncomm, (char __user *)arg2, - sizeof(me->comm)-1) < 0) + case PR_SET_NAME: + comm[sizeof(me->comm)-1] = 0; + if (strncpy_from_user(comm, (char __user *)arg2, + sizeof(me->comm) - 1) < 0) return -EFAULT; - set_task_comm(me, ncomm); + set_task_comm(me, comm); return 0; - } - case PR_GET_NAME: { - struct task_struct *me = current; - unsigned char tcomm[sizeof(me->comm)]; - - get_task_comm(tcomm, me); - if (copy_to_user((char __user *)arg2, tcomm, sizeof(tcomm))) + case PR_GET_NAME: + get_task_comm(comm, me); + if (copy_to_user((char __user *)arg2, comm, + sizeof(comm))) return -EFAULT; return 0; - } case PR_GET_ENDIAN: - error = GET_ENDIAN(current, arg2); + error = GET_ENDIAN(me, arg2); break; case PR_SET_ENDIAN: - error = SET_ENDIAN(current, arg2); + error = SET_ENDIAN(me, arg2); break; case PR_GET_SECCOMP: @@ -1725,6 +1760,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, current->default_timer_slack_ns; else current->timer_slack_ns = arg2; + error = 0; break; default: error = -EINVAL; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9f3b478f9171..5c97c5b4ea8f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -246,7 +246,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) memcpy(data->comm, tsk->comm, TASK_COMM_LEN); data->pid = tsk->pid; - data->uid = tsk->uid; + data->uid = task_uid(tsk); data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; data->policy = tsk->policy; data->rt_priority = tsk->rt_priority; diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 8ebcd8532dfb..6d1ed07bf312 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -53,8 +53,8 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) stats->ac_flag |= AXSIG; stats->ac_nice = task_nice(tsk); stats->ac_sched = tsk->policy; - stats->ac_uid = tsk->uid; - stats->ac_gid = tsk->gid; + stats->ac_uid = tsk->cred->uid; + stats->ac_gid = tsk->cred->gid; stats->ac_pid = tsk->pid; rcu_read_lock(); stats->ac_ppid = pid_alive(tsk) ? diff --git a/kernel/uid16.c b/kernel/uid16.c index 3e41c1673e2f..71f07fc39fea 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -86,9 +86,9 @@ asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, { int retval; - if (!(retval = put_user(high2lowuid(current->uid), ruid)) && - !(retval = put_user(high2lowuid(current->euid), euid))) - retval = put_user(high2lowuid(current->suid), suid); + if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && + !(retval = put_user(high2lowuid(current->cred->euid), euid))) + retval = put_user(high2lowuid(current->cred->suid), suid); return retval; } @@ -106,9 +106,9 @@ asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, { int retval; - if (!(retval = put_user(high2lowgid(current->gid), rgid)) && - !(retval = put_user(high2lowgid(current->egid), egid))) - retval = put_user(high2lowgid(current->sgid), sgid); + if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && + !(retval = put_user(high2lowgid(current->cred->egid), egid))) + retval = put_user(high2lowgid(current->cred->sgid), sgid); return retval; } @@ -166,20 +166,20 @@ asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist) if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(current->cred->group_info); + i = current->cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, current->cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(current->cred->group_info); return i; } @@ -210,20 +210,20 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist) asmlinkage long sys_getuid16(void) { - return high2lowuid(current->uid); + return high2lowuid(current->cred->uid); } asmlinkage long sys_geteuid16(void) { - return high2lowuid(current->euid); + return high2lowuid(current->cred->euid); } asmlinkage long sys_getgid16(void) { - return high2lowgid(current->gid); + return high2lowgid(current->cred->gid); } asmlinkage long sys_getegid16(void) { - return high2lowgid(current->egid); + return high2lowgid(current->cred->egid); } diff --git a/kernel/user.c b/kernel/user.c index 39d6159fae43..104d22ac84d5 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -457,11 +457,11 @@ void switch_uid(struct user_struct *new_user) * cheaply with the new uid cache, so if it matters * we should be checking for it. -DaveM */ - old_user = current->user; + old_user = current->cred->user; atomic_inc(&new_user->processes); atomic_dec(&old_user->processes); switch_uid_keyring(new_user); - current->user = new_user; + current->cred->user = new_user; sched_switch_user(current); /* diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 07a96474077d..b23492ee3e50 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1110,12 +1110,12 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *old_nodes, const unsigned long __user *new_nodes) { + struct cred *cred, *tcred; struct mm_struct *mm; struct task_struct *task; nodemask_t old; nodemask_t new; nodemask_t task_nodes; - uid_t uid, euid; int err; err = get_nodes(&old, old_nodes, maxnode); @@ -1145,10 +1145,10 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, * capabilities, superuser privileges or the same * userid as the target process. */ - uid = current_uid(); - euid = current_euid(); - if (euid != task->suid && euid != task->uid && - uid != task->suid && uid != task->uid && + cred = current->cred; + tcred = task->cred; + if (cred->euid != tcred->suid && cred->euid != tcred->uid && + cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out; diff --git a/mm/migrate.c b/mm/migrate.c index 6263c24c4afe..794443da1b4f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1045,10 +1045,10 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, const int __user *nodes, int __user *status, int flags) { + struct cred *cred, *tcred; struct task_struct *task; struct mm_struct *mm; int err; - uid_t uid, euid; /* Check flags */ if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) @@ -1076,10 +1076,10 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, * capabilities, superuser privileges or the same * userid as the target process. */ - uid = current_uid(); - euid = current_euid(); - if (euid != task->suid && euid != task->uid && - uid != task->suid && uid != task->uid && + cred = current->cred; + tcred = task->cred; + if (cred->euid != tcred->suid && cred->euid != tcred->uid && + cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 34a458aa7997..3af787ba2077 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -298,7 +298,7 @@ static void dump_tasks(const struct mem_cgroup *mem) task_lock(p); printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", - p->pid, p->uid, p->tgid, p->mm->total_vm, + p->pid, p->cred->uid, p->tgid, p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj, p->comm); task_unlock(p); diff --git a/net/core/scm.c b/net/core/scm.c index 4681d8f9b45b..c28ca32a7d93 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -44,11 +44,13 @@ static __inline__ int scm_check_creds(struct ucred *creds) { + struct cred *cred = current->cred; + if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && - ((creds->uid == current_uid() || creds->uid == current_euid() || - creds->uid == current_suid()) || capable(CAP_SETUID)) && - ((creds->gid == current_gid() || creds->gid == current_egid() || - creds->gid == current_sgid()) || capable(CAP_SETGID))) { + ((creds->uid == cred->uid || creds->uid == cred->euid || + creds->uid == cred->suid) || capable(CAP_SETUID)) && + ((creds->gid == cred->gid || creds->gid == cred->egid || + creds->gid == cred->sgid) || capable(CAP_SETGID))) { return 0; } return -EPERM; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 8fc380578807..c79543212602 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -353,7 +353,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) struct auth_cred acred = { .uid = current_fsuid(), .gid = current_fsgid(), - .group_info = current->group_info, + .group_info = current->cred->group_info, }; struct rpc_cred *ret; diff --git a/security/commoncap.c b/security/commoncap.c index fb4e240720d8..fa61679f8c73 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -30,7 +30,7 @@ int cap_netlink_send(struct sock *sk, struct sk_buff *skb) { - NETLINK_CB(skb).eff_cap = current->cap_effective; + NETLINK_CB(skb).eff_cap = current_cap(); return 0; } @@ -52,7 +52,7 @@ EXPORT_SYMBOL(cap_netlink_recv); int cap_capable(struct task_struct *tsk, int cap, int audit) { /* Derived from include/linux/sched.h:capable. */ - if (cap_raised(tsk->cap_effective, cap)) + if (cap_raised(tsk->cred->cap_effective, cap)) return 0; return -EPERM; } @@ -67,7 +67,8 @@ int cap_settime(struct timespec *ts, struct timezone *tz) int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) { /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ - if (cap_issubset(child->cap_permitted, current->cap_permitted)) + if (cap_issubset(child->cred->cap_permitted, + current->cred->cap_permitted)) return 0; if (capable(CAP_SYS_PTRACE)) return 0; @@ -76,8 +77,8 @@ int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) int cap_ptrace_traceme(struct task_struct *parent) { - /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ - if (cap_issubset(current->cap_permitted, parent->cap_permitted)) + if (cap_issubset(current->cred->cap_permitted, + parent->cred->cap_permitted)) return 0; if (has_capability(parent, CAP_SYS_PTRACE)) return 0; @@ -87,10 +88,12 @@ int cap_ptrace_traceme(struct task_struct *parent) int cap_capget (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { + struct cred *cred = target->cred; + /* Derived from kernel/capability.c:sys_capget. */ - *effective = target->cap_effective; - *inheritable = target->cap_inheritable; - *permitted = target->cap_permitted; + *effective = cred->cap_effective; + *inheritable = cred->cap_inheritable; + *permitted = cred->cap_permitted; return 0; } @@ -122,24 +125,26 @@ int cap_capset_check(const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) { + const struct cred *cred = current->cred; + if (cap_inh_is_capped() && !cap_issubset(*inheritable, - cap_combine(current->cap_inheritable, - current->cap_permitted))) { + cap_combine(cred->cap_inheritable, + cred->cap_permitted))) { /* incapable of using this inheritable set */ return -EPERM; } if (!cap_issubset(*inheritable, - cap_combine(current->cap_inheritable, - current->cap_bset))) { + cap_combine(cred->cap_inheritable, + cred->cap_bset))) { /* no new pI capabilities outside bounding set */ return -EPERM; } /* verify restrictions on target's new Permitted set */ if (!cap_issubset (*permitted, - cap_combine (current->cap_permitted, - current->cap_permitted))) { + cap_combine (cred->cap_permitted, + cred->cap_permitted))) { return -EPERM; } @@ -155,9 +160,11 @@ void cap_capset_set(const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) { - current->cap_effective = *effective; - current->cap_inheritable = *inheritable; - current->cap_permitted = *permitted; + struct cred *cred = current->cred; + + cred->cap_effective = *effective; + cred->cap_inheritable = *inheritable; + cred->cap_permitted = *permitted; } static inline void bprm_clear_caps(struct linux_binprm *bprm) @@ -211,8 +218,8 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, * pP' = (X & fP) | (pI & fI) */ bprm->cap_post_exec_permitted.cap[i] = - (current->cap_bset.cap[i] & permitted) | - (current->cap_inheritable.cap[i] & inheritable); + (current->cred->cap_bset.cap[i] & permitted) | + (current->cred->cap_inheritable.cap[i] & inheritable); if (permitted & ~bprm->cap_post_exec_permitted.cap[i]) { /* @@ -354,8 +361,8 @@ int cap_bprm_set_security (struct linux_binprm *bprm) if (bprm->e_uid == 0 || current_uid() == 0) { /* pP' = (cap_bset & ~0) | (pI & ~0) */ bprm->cap_post_exec_permitted = cap_combine( - current->cap_bset, current->cap_inheritable - ); + current->cred->cap_bset, + current->cred->cap_inheritable); bprm->cap_effective = (bprm->e_uid == 0); ret = 0; } @@ -366,44 +373,39 @@ int cap_bprm_set_security (struct linux_binprm *bprm) void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) { - kernel_cap_t pP = current->cap_permitted; - kernel_cap_t pE = current->cap_effective; - uid_t uid; - gid_t gid; + struct cred *cred = current->cred; - current_uid_gid(&uid, &gid); - - if (bprm->e_uid != uid || bprm->e_gid != gid || + if (bprm->e_uid != cred->uid || bprm->e_gid != cred->gid || !cap_issubset(bprm->cap_post_exec_permitted, - current->cap_permitted)) { + cred->cap_permitted)) { set_dumpable(current->mm, suid_dumpable); current->pdeath_signal = 0; if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { if (!capable(CAP_SETUID)) { - bprm->e_uid = uid; - bprm->e_gid = gid; + bprm->e_uid = cred->uid; + bprm->e_gid = cred->gid; } if (cap_limit_ptraced_target()) { bprm->cap_post_exec_permitted = cap_intersect( bprm->cap_post_exec_permitted, - current->cap_permitted); + cred->cap_permitted); } } } - current->suid = current->euid = current->fsuid = bprm->e_uid; - current->sgid = current->egid = current->fsgid = bprm->e_gid; + cred->suid = cred->euid = cred->fsuid = bprm->e_uid; + cred->sgid = cred->egid = cred->fsgid = bprm->e_gid; /* For init, we want to retain the capabilities set * in the init_task struct. Thus we skip the usual * capability rules */ if (!is_global_init(current)) { - current->cap_permitted = bprm->cap_post_exec_permitted; + cred->cap_permitted = bprm->cap_post_exec_permitted; if (bprm->cap_effective) - current->cap_effective = bprm->cap_post_exec_permitted; + cred->cap_effective = bprm->cap_post_exec_permitted; else - cap_clear(current->cap_effective); + cap_clear(cred->cap_effective); } /* @@ -418,27 +420,30 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) * Number 1 above might fail if you don't have a full bset, but I think * that is interesting information to audit. */ - if (!cap_isclear(current->cap_effective)) { - if (!cap_issubset(CAP_FULL_SET, current->cap_effective) || - (bprm->e_uid != 0) || (current->uid != 0) || + if (!cap_isclear(cred->cap_effective)) { + if (!cap_issubset(CAP_FULL_SET, cred->cap_effective) || + (bprm->e_uid != 0) || (cred->uid != 0) || issecure(SECURE_NOROOT)) - audit_log_bprm_fcaps(bprm, &pP, &pE); + audit_log_bprm_fcaps(bprm, &cred->cap_permitted, + &cred->cap_effective); } - current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); } int cap_bprm_secureexec (struct linux_binprm *bprm) { - if (current_uid() != 0) { + const struct cred *cred = current->cred; + + if (cred->uid != 0) { if (bprm->cap_effective) return 1; if (!cap_isclear(bprm->cap_post_exec_permitted)) return 1; } - return (current_euid() != current_uid() || - current_egid() != current_gid()); + return (cred->euid != cred->uid || + cred->egid != cred->gid); } int cap_inode_setxattr(struct dentry *dentry, const char *name, @@ -501,25 +506,27 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) static inline void cap_emulate_setxuid (int old_ruid, int old_euid, int old_suid) { - uid_t euid = current_euid(); + struct cred *cred = current->cred; if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && - (current_uid() != 0 && euid != 0 && current_suid() != 0) && + (cred->uid != 0 && cred->euid != 0 && cred->suid != 0) && !issecure(SECURE_KEEP_CAPS)) { - cap_clear (current->cap_permitted); - cap_clear (current->cap_effective); + cap_clear (cred->cap_permitted); + cap_clear (cred->cap_effective); } - if (old_euid == 0 && euid != 0) { - cap_clear (current->cap_effective); + if (old_euid == 0 && cred->euid != 0) { + cap_clear (cred->cap_effective); } - if (old_euid != 0 && euid == 0) { - current->cap_effective = current->cap_permitted; + if (old_euid != 0 && cred->euid == 0) { + cred->cap_effective = cred->cap_permitted; } } int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags) { + struct cred *cred = current->cred; + switch (flags) { case LSM_SETID_RE: case LSM_SETID_ID: @@ -541,16 +548,16 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, */ if (!issecure (SECURE_NO_SETUID_FIXUP)) { - if (old_fsuid == 0 && current_fsuid() != 0) { - current->cap_effective = + if (old_fsuid == 0 && cred->fsuid != 0) { + cred->cap_effective = cap_drop_fs_set( - current->cap_effective); + cred->cap_effective); } - if (old_fsuid != 0 && current_fsuid() == 0) { - current->cap_effective = + if (old_fsuid != 0 && cred->fsuid == 0) { + cred->cap_effective = cap_raise_fs_set( - current->cap_effective, - current->cap_permitted); + cred->cap_effective, + cred->cap_permitted); } } break; @@ -575,7 +582,8 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, */ static int cap_safe_nice(struct task_struct *p) { - if (!cap_issubset(p->cap_permitted, current->cap_permitted) && + if (!cap_issubset(p->cred->cap_permitted, + current->cred->cap_permitted) && !capable(CAP_SYS_NICE)) return -EPERM; return 0; @@ -610,7 +618,7 @@ static long cap_prctl_drop(unsigned long cap) return -EPERM; if (!cap_valid(cap)) return -EINVAL; - cap_lower(current->cap_bset, cap); + cap_lower(current->cred->cap_bset, cap); return 0; } @@ -633,6 +641,7 @@ int cap_task_setnice (struct task_struct *p, int nice) int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5, long *rc_p) { + struct cred *cred = current->cred; long error = 0; switch (option) { @@ -640,7 +649,7 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, if (!cap_valid(arg2)) error = -EINVAL; else - error = !!cap_raised(current->cap_bset, arg2); + error = !!cap_raised(cred->cap_bset, arg2); break; #ifdef CONFIG_SECURITY_FILE_CAPABILITIES case PR_CAPBSET_DROP: @@ -667,9 +676,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, * capability-based-privilege environment. */ case PR_SET_SECUREBITS: - if ((((current->securebits & SECURE_ALL_LOCKS) >> 1) - & (current->securebits ^ arg2)) /*[1]*/ - || ((current->securebits & SECURE_ALL_LOCKS + if ((((cred->securebits & SECURE_ALL_LOCKS) >> 1) + & (cred->securebits ^ arg2)) /*[1]*/ + || ((cred->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/ || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0)) { /*[4]*/ @@ -682,11 +691,11 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, */ error = -EPERM; /* cannot change a locked bit */ } else { - current->securebits = arg2; + cred->securebits = arg2; } break; case PR_GET_SECUREBITS: - error = current->securebits; + error = cred->securebits; break; #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ @@ -701,10 +710,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, else if (issecure(SECURE_KEEP_CAPS_LOCKED)) error = -EPERM; else if (arg2) - current->securebits |= issecure_mask(SECURE_KEEP_CAPS); + cred->securebits |= issecure_mask(SECURE_KEEP_CAPS); else - current->securebits &= - ~issecure_mask(SECURE_KEEP_CAPS); + cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); break; default: @@ -719,11 +727,12 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, void cap_task_reparent_to_init (struct task_struct *p) { - cap_set_init_eff(p->cap_effective); - cap_clear(p->cap_inheritable); - cap_set_full(p->cap_permitted); - p->securebits = SECUREBITS_DEFAULT; - return; + struct cred *cred = p->cred; + + cap_set_init_eff(cred->cap_effective); + cap_clear(cred->cap_inheritable); + cap_set_full(cred->cap_permitted); + p->cred->securebits = SECUREBITS_DEFAULT; } int cap_syslog (int type) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index fcce331eca72..8833b447adef 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -889,7 +889,7 @@ long keyctl_instantiate_key(key_serial_t id, /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; - instkey = current->request_key_auth; + instkey = current->cred->request_key_auth; if (!instkey) goto error; @@ -932,8 +932,8 @@ long keyctl_instantiate_key(key_serial_t id, /* discard the assumed authority if it's just been disabled by * instantiation of the key */ if (ret == 0) { - key_put(current->request_key_auth); - current->request_key_auth = NULL; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = NULL; } error2: @@ -960,7 +960,7 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; - instkey = current->request_key_auth; + instkey = current->cred->request_key_auth; if (!instkey) goto error; @@ -983,8 +983,8 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) /* discard the assumed authority if it's just been disabled by * instantiation of the key */ if (ret == 0) { - key_put(current->request_key_auth); - current->request_key_auth = NULL; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = NULL; } error: @@ -999,6 +999,7 @@ error: */ long keyctl_set_reqkey_keyring(int reqkey_defl) { + struct cred *cred = current->cred; int ret; switch (reqkey_defl) { @@ -1018,10 +1019,10 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) case KEY_REQKEY_DEFL_USER_KEYRING: case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: set: - current->jit_keyring = reqkey_defl; + cred->jit_keyring = reqkey_defl; case KEY_REQKEY_DEFL_NO_CHANGE: - return current->jit_keyring; + return cred->jit_keyring; case KEY_REQKEY_DEFL_GROUP_KEYRING: default: @@ -1086,8 +1087,8 @@ long keyctl_assume_authority(key_serial_t id) /* we divest ourselves of authority if given an ID of 0 */ if (id == 0) { - key_put(current->request_key_auth); - current->request_key_auth = NULL; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = NULL; ret = 0; goto error; } @@ -1103,8 +1104,8 @@ long keyctl_assume_authority(key_serial_t id) goto error; } - key_put(current->request_key_auth); - current->request_key_auth = authkey; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = authkey; ret = authkey->serial; error: diff --git a/security/keys/permission.c b/security/keys/permission.c index 3b41f9b52537..baf3d5f31e71 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -22,6 +22,7 @@ int key_task_permission(const key_ref_t key_ref, struct task_struct *context, key_perm_t perm) { + struct cred *cred = context->cred; struct key *key; key_perm_t kperm; int ret; @@ -29,7 +30,7 @@ int key_task_permission(const key_ref_t key_ref, key = key_ref_to_ptr(key_ref); /* use the second 8-bits of permissions for keys the caller owns */ - if (key->uid == context->fsuid) { + if (key->uid == cred->fsuid) { kperm = key->perm >> 16; goto use_these_perms; } @@ -37,14 +38,14 @@ int key_task_permission(const key_ref_t key_ref, /* use the third 8-bits of permissions for keys the caller has a group * membership in common with */ if (key->gid != -1 && key->perm & KEY_GRP_ALL) { - if (key->gid == context->fsgid) { + if (key->gid == cred->fsgid) { kperm = key->perm >> 8; goto use_these_perms; } - task_lock(context); - ret = groups_search(context->group_info, key->gid); - task_unlock(context); + spin_lock(&cred->lock); + ret = groups_search(cred->group_info, key->gid); + spin_unlock(&cred->lock); if (ret) { kperm = key->perm >> 8; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 1c793b7090a7..b0904cdda2e7 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -42,7 +42,7 @@ struct key_user root_key_user = { */ int install_user_keyrings(void) { - struct user_struct *user = current->user; + struct user_struct *user = current->cred->user; struct key *uid_keyring, *session_keyring; char buf[20]; int ret; @@ -156,7 +156,7 @@ int install_thread_keyring(void) sprintf(buf, "_tid.%u", tsk->pid); - keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); @@ -164,8 +164,8 @@ int install_thread_keyring(void) } task_lock(tsk); - old = tsk->thread_keyring; - tsk->thread_keyring = keyring; + old = tsk->cred->thread_keyring; + tsk->cred->thread_keyring = keyring; task_unlock(tsk); ret = 0; @@ -192,7 +192,7 @@ int install_process_keyring(void) if (!tsk->signal->process_keyring) { sprintf(buf, "_pid.%u", tsk->tgid); - keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); @@ -238,7 +238,7 @@ static int install_session_keyring(struct key *keyring) if (tsk->signal->session_keyring) flags = KEY_ALLOC_IN_QUOTA; - keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, flags, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); @@ -292,14 +292,14 @@ int copy_thread_group_keys(struct task_struct *tsk) */ int copy_keys(unsigned long clone_flags, struct task_struct *tsk) { - key_check(tsk->thread_keyring); - key_check(tsk->request_key_auth); + key_check(tsk->cred->thread_keyring); + key_check(tsk->cred->request_key_auth); /* no thread keyring yet */ - tsk->thread_keyring = NULL; + tsk->cred->thread_keyring = NULL; /* copy the request_key() authorisation for this thread */ - key_get(tsk->request_key_auth); + key_get(tsk->cred->request_key_auth); return 0; @@ -322,8 +322,8 @@ void exit_thread_group_keys(struct signal_struct *tg) */ void exit_keys(struct task_struct *tsk) { - key_put(tsk->thread_keyring); - key_put(tsk->request_key_auth); + key_put(tsk->cred->thread_keyring); + key_put(tsk->cred->request_key_auth); } /* end exit_keys() */ @@ -337,8 +337,8 @@ int exec_keys(struct task_struct *tsk) /* newly exec'd tasks don't get a thread keyring */ task_lock(tsk); - old = tsk->thread_keyring; - tsk->thread_keyring = NULL; + old = tsk->cred->thread_keyring; + tsk->cred->thread_keyring = NULL; task_unlock(tsk); key_put(old); @@ -373,10 +373,11 @@ int suid_keys(struct task_struct *tsk) void key_fsuid_changed(struct task_struct *tsk) { /* update the ownership of the thread keyring */ - if (tsk->thread_keyring) { - down_write(&tsk->thread_keyring->sem); - tsk->thread_keyring->uid = tsk->fsuid; - up_write(&tsk->thread_keyring->sem); + BUG_ON(!tsk->cred); + if (tsk->cred->thread_keyring) { + down_write(&tsk->cred->thread_keyring->sem); + tsk->cred->thread_keyring->uid = tsk->cred->fsuid; + up_write(&tsk->cred->thread_keyring->sem); } } /* end key_fsuid_changed() */ @@ -388,10 +389,11 @@ void key_fsuid_changed(struct task_struct *tsk) void key_fsgid_changed(struct task_struct *tsk) { /* update the ownership of the thread keyring */ - if (tsk->thread_keyring) { - down_write(&tsk->thread_keyring->sem); - tsk->thread_keyring->gid = tsk->fsgid; - up_write(&tsk->thread_keyring->sem); + BUG_ON(!tsk->cred); + if (tsk->cred->thread_keyring) { + down_write(&tsk->cred->thread_keyring->sem); + tsk->cred->thread_keyring->gid = tsk->cred->fsgid; + up_write(&tsk->cred->thread_keyring->sem); } } /* end key_fsgid_changed() */ @@ -426,9 +428,9 @@ key_ref_t search_process_keyrings(struct key_type *type, err = ERR_PTR(-EAGAIN); /* search the thread keyring first */ - if (context->thread_keyring) { + if (context->cred->thread_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->thread_keyring, 1), + make_key_ref(context->cred->thread_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -493,9 +495,9 @@ key_ref_t search_process_keyrings(struct key_type *type, } } /* or search the user-session keyring */ - else if (context->user->session_keyring) { + else if (context->cred->user->session_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->user->session_keyring, 1), + make_key_ref(context->cred->user->session_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -517,20 +519,20 @@ key_ref_t search_process_keyrings(struct key_type *type, * search the keyrings of the process mentioned there * - we don't permit access to request_key auth keys via this method */ - if (context->request_key_auth && + if (context->cred->request_key_auth && context == current && type != &key_type_request_key_auth ) { /* defend against the auth key being revoked */ - down_read(&context->request_key_auth->sem); + down_read(&context->cred->request_key_auth->sem); - if (key_validate(context->request_key_auth) == 0) { - rka = context->request_key_auth->payload.data; + if (key_validate(context->cred->request_key_auth) == 0) { + rka = context->cred->request_key_auth->payload.data; key_ref = search_process_keyrings(type, description, match, rka->context); - up_read(&context->request_key_auth->sem); + up_read(&context->cred->request_key_auth->sem); if (!IS_ERR(key_ref)) goto found; @@ -547,7 +549,7 @@ key_ref_t search_process_keyrings(struct key_type *type, break; } } else { - up_read(&context->request_key_auth->sem); + up_read(&context->cred->request_key_auth->sem); } } @@ -580,15 +582,16 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, { struct request_key_auth *rka; struct task_struct *t = current; - key_ref_t key_ref, skey_ref; + struct cred *cred = t->cred; struct key *key; + key_ref_t key_ref, skey_ref; int ret; key_ref = ERR_PTR(-ENOKEY); switch (id) { case KEY_SPEC_THREAD_KEYRING: - if (!t->thread_keyring) { + if (!cred->thread_keyring) { if (!create) goto error; @@ -599,7 +602,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, } } - key = t->thread_keyring; + key = cred->thread_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; @@ -628,7 +631,8 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, ret = install_user_keyrings(); if (ret < 0) goto error; - ret = install_session_keyring(t->user->session_keyring); + ret = install_session_keyring( + cred->user->session_keyring); if (ret < 0) goto error; } @@ -641,25 +645,25 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, break; case KEY_SPEC_USER_KEYRING: - if (!t->user->uid_keyring) { + if (!cred->user->uid_keyring) { ret = install_user_keyrings(); if (ret < 0) goto error; } - key = t->user->uid_keyring; + key = cred->user->uid_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_SESSION_KEYRING: - if (!t->user->session_keyring) { + if (!cred->user->session_keyring) { ret = install_user_keyrings(); if (ret < 0) goto error; } - key = t->user->session_keyring; + key = cred->user->session_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; @@ -670,7 +674,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, goto error; case KEY_SPEC_REQKEY_AUTH_KEY: - key = t->request_key_auth; + key = cred->request_key_auth; if (!key) goto error; @@ -679,19 +683,19 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, break; case KEY_SPEC_REQUESTOR_KEYRING: - if (!t->request_key_auth) + if (!cred->request_key_auth) goto error; - down_read(&t->request_key_auth->sem); - if (t->request_key_auth->flags & KEY_FLAG_REVOKED) { + down_read(&cred->request_key_auth->sem); + if (cred->request_key_auth->flags & KEY_FLAG_REVOKED) { key_ref = ERR_PTR(-EKEYREVOKED); key = NULL; } else { - rka = t->request_key_auth->payload.data; + rka = cred->request_key_auth->payload.data; key = rka->dest_keyring; atomic_inc(&key->usage); } - up_read(&t->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); if (!key) goto error; key_ref = make_key_ref(key, 1); @@ -791,7 +795,7 @@ long join_session_keyring(const char *name) keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ - keyring = keyring_alloc(name, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(name, tsk->cred->uid, tsk->cred->gid, tsk, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 8e9d93b4a402..3e9b9eb1dd28 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -104,7 +104,8 @@ static int call_sbin_request_key(struct key_construction *cons, /* we specify the process's default keyrings */ sprintf(keyring_str[0], "%d", - tsk->thread_keyring ? tsk->thread_keyring->serial : 0); + tsk->cred->thread_keyring ? + tsk->cred->thread_keyring->serial : 0); prkey = 0; if (tsk->signal->process_keyring) @@ -117,7 +118,7 @@ static int call_sbin_request_key(struct key_construction *cons, sskey = rcu_dereference(tsk->signal->session_keyring)->serial; rcu_read_unlock(); } else { - sskey = tsk->user->session_keyring->serial; + sskey = tsk->cred->user->session_keyring->serial; } sprintf(keyring_str[2], "%d", sskey); @@ -232,11 +233,11 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) } else { /* use a default keyring; falling through the cases until we * find one that we actually have */ - switch (tsk->jit_keyring) { + switch (tsk->cred->jit_keyring) { case KEY_REQKEY_DEFL_DEFAULT: case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: - if (tsk->request_key_auth) { - authkey = tsk->request_key_auth; + if (tsk->cred->request_key_auth) { + authkey = tsk->cred->request_key_auth; down_read(&authkey->sem); rka = authkey->payload.data; if (!test_bit(KEY_FLAG_REVOKED, @@ -249,7 +250,7 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) } case KEY_REQKEY_DEFL_THREAD_KEYRING: - dest_keyring = key_get(tsk->thread_keyring); + dest_keyring = key_get(tsk->cred->thread_keyring); if (dest_keyring) break; @@ -268,11 +269,12 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) break; case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: - dest_keyring = key_get(tsk->user->session_keyring); + dest_keyring = + key_get(tsk->cred->user->session_keyring); break; case KEY_REQKEY_DEFL_USER_KEYRING: - dest_keyring = key_get(tsk->user->uid_keyring); + dest_keyring = key_get(tsk->cred->user->uid_keyring); break; case KEY_REQKEY_DEFL_GROUP_KEYRING: diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 1762d44711d5..2125579d5d73 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -164,22 +164,22 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, /* see if the calling process is already servicing the key request of * another process */ - if (current->request_key_auth) { + if (current->cred->request_key_auth) { /* it is - use that instantiation context here too */ - down_read(¤t->request_key_auth->sem); + down_read(¤t->cred->request_key_auth->sem); /* if the auth key has been revoked, then the key we're * servicing is already instantiated */ if (test_bit(KEY_FLAG_REVOKED, - ¤t->request_key_auth->flags)) + ¤t->cred->request_key_auth->flags)) goto auth_key_revoked; - irka = current->request_key_auth->payload.data; + irka = current->cred->request_key_auth->payload.data; rka->context = irka->context; rka->pid = irka->pid; get_task_struct(rka->context); - up_read(¤t->request_key_auth->sem); + up_read(¤t->cred->request_key_auth->sem); } else { /* it isn't - use this process as the context */ @@ -214,7 +214,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, return authkey; auth_key_revoked: - up_read(¤t->request_key_auth->sem); + up_read(¤t->cred->request_key_auth->sem); kfree(rka->callout_info); kfree(rka); kleave("= -EKEYREVOKED"); diff --git a/security/selinux/exports.c b/security/selinux/exports.c index 64af2d3409ef..cf02490cd1eb 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c @@ -39,7 +39,7 @@ EXPORT_SYMBOL_GPL(selinux_string_to_sid); int selinux_secmark_relabel_packet_permission(u32 sid) { if (selinux_enabled) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; return avc_has_perm(tsec->sid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 9f6da154cc82..328308f2882a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -167,21 +167,21 @@ static int task_alloc_security(struct task_struct *task) return -ENOMEM; tsec->osid = tsec->sid = SECINITSID_UNLABELED; - task->security = tsec; + task->cred->security = tsec; return 0; } static void task_free_security(struct task_struct *task) { - struct task_security_struct *tsec = task->security; - task->security = NULL; + struct task_security_struct *tsec = task->cred->security; + task->cred->security = NULL; kfree(tsec); } static int inode_alloc_security(struct inode *inode) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct inode_security_struct *isec; isec = kmem_cache_zalloc(sel_inode_cache, GFP_NOFS); @@ -215,7 +215,7 @@ static void inode_free_security(struct inode *inode) static int file_alloc_security(struct file *file) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct file_security_struct *fsec; fsec = kzalloc(sizeof(struct file_security_struct), GFP_KERNEL); @@ -554,7 +554,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts) { int rc = 0, i; - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct superblock_security_struct *sbsec = sb->s_security; const char *name = sb->s_type->name; struct inode *inode = sbsec->sb->s_root->d_inode; @@ -1353,8 +1353,8 @@ static int task_has_perm(struct task_struct *tsk1, { struct task_security_struct *tsec1, *tsec2; - tsec1 = tsk1->security; - tsec2 = tsk2->security; + tsec1 = tsk1->cred->security; + tsec2 = tsk2->cred->security; return avc_has_perm(tsec1->sid, tsec2->sid, SECCLASS_PROCESS, perms, NULL); } @@ -1374,7 +1374,7 @@ static int task_has_capability(struct task_struct *tsk, u32 av = CAP_TO_MASK(cap); int rc; - tsec = tsk->security; + tsec = tsk->cred->security; AVC_AUDIT_DATA_INIT(&ad, CAP); ad.tsk = tsk; @@ -1405,7 +1405,7 @@ static int task_has_system(struct task_struct *tsk, { struct task_security_struct *tsec; - tsec = tsk->security; + tsec = tsk->cred->security; return avc_has_perm(tsec->sid, SECINITSID_KERNEL, SECCLASS_SYSTEM, perms, NULL); @@ -1426,7 +1426,7 @@ static int inode_has_perm(struct task_struct *tsk, if (unlikely(IS_PRIVATE(inode))) return 0; - tsec = tsk->security; + tsec = tsk->cred->security; isec = inode->i_security; if (!adp) { @@ -1466,7 +1466,7 @@ static int file_has_perm(struct task_struct *tsk, struct file *file, u32 av) { - struct task_security_struct *tsec = tsk->security; + struct task_security_struct *tsec = tsk->cred->security; struct file_security_struct *fsec = file->f_security; struct inode *inode = file->f_path.dentry->d_inode; struct avc_audit_data ad; @@ -1503,7 +1503,7 @@ static int may_create(struct inode *dir, struct avc_audit_data ad; int rc; - tsec = current->security; + tsec = current->cred->security; dsec = dir->i_security; sbsec = dir->i_sb->s_security; @@ -1540,7 +1540,7 @@ static int may_create_key(u32 ksid, { struct task_security_struct *tsec; - tsec = ctx->security; + tsec = ctx->cred->security; return avc_has_perm(tsec->sid, ksid, SECCLASS_KEY, KEY__CREATE, NULL); } @@ -1561,7 +1561,7 @@ static int may_link(struct inode *dir, u32 av; int rc; - tsec = current->security; + tsec = current->cred->security; dsec = dir->i_security; isec = dentry->d_inode->i_security; @@ -1606,7 +1606,7 @@ static inline int may_rename(struct inode *old_dir, int old_is_dir, new_is_dir; int rc; - tsec = current->security; + tsec = current->cred->security; old_dsec = old_dir->i_security; old_isec = old_dentry->d_inode->i_security; old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode); @@ -1659,7 +1659,7 @@ static int superblock_has_perm(struct task_struct *tsk, struct task_security_struct *tsec; struct superblock_security_struct *sbsec; - tsec = tsk->security; + tsec = tsk->cred->security; sbsec = sb->s_security; return avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, perms, ad); @@ -1758,8 +1758,8 @@ static int selinux_ptrace_may_access(struct task_struct *child, return rc; if (mode == PTRACE_MODE_READ) { - struct task_security_struct *tsec = current->security; - struct task_security_struct *csec = child->security; + struct task_security_struct *tsec = current->cred->security; + struct task_security_struct *csec = child->cred->security; return avc_has_perm(tsec->sid, csec->sid, SECCLASS_FILE, FILE__READ, NULL); } @@ -1874,7 +1874,7 @@ static int selinux_sysctl(ctl_table *table, int op) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; rc = selinux_sysctl_get_sid(table, (op == 0001) ? SECCLASS_DIR : SECCLASS_FILE, &tsid); @@ -2025,7 +2025,7 @@ static int selinux_bprm_set_security(struct linux_binprm *bprm) if (bsec->set) return 0; - tsec = current->security; + tsec = current->cred->security; isec = inode->i_security; /* Default to the current task SID. */ @@ -2090,7 +2090,7 @@ static int selinux_bprm_check_security(struct linux_binprm *bprm) static int selinux_bprm_secureexec(struct linux_binprm *bprm) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; int atsecure = 0; if (tsec->osid != tsec->sid) { @@ -2214,7 +2214,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) secondary_ops->bprm_apply_creds(bprm, unsafe); - tsec = current->security; + tsec = current->cred->security; bsec = bprm->security; sid = bsec->sid; @@ -2243,7 +2243,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) rcu_read_lock(); tracer = tracehook_tracer_task(current); if (likely(tracer != NULL)) { - sec = tracer->security; + sec = tracer->cred->security; ptsid = sec->sid; } rcu_read_unlock(); @@ -2274,7 +2274,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm) int rc, i; unsigned long flags; - tsec = current->security; + tsec = current->cred->security; bsec = bprm->security; if (bsec->unsafe) { @@ -2521,7 +2521,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, int rc; char *namep = NULL, *context; - tsec = current->security; + tsec = current->cred->security; dsec = dir->i_security; sbsec = dir->i_sb->s_security; @@ -2706,7 +2706,7 @@ static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name) static int selinux_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct inode *inode = dentry->d_inode; struct inode_security_struct *isec = inode->i_security; struct superblock_security_struct *sbsec; @@ -2918,7 +2918,7 @@ static int selinux_revalidate_file_permission(struct file *file, int mask) static int selinux_file_permission(struct file *file, int mask) { struct inode *inode = file->f_path.dentry->d_inode; - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct file_security_struct *fsec = file->f_security; struct inode_security_struct *isec = inode->i_security; @@ -2995,7 +2995,8 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { int rc = 0; - u32 sid = ((struct task_security_struct *)(current->security))->sid; + u32 sid = ((struct task_security_struct *) + (current->cred->security))->sid; if (addr < mmap_min_addr) rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT, @@ -3107,7 +3108,7 @@ static int selinux_file_set_fowner(struct file *file) struct task_security_struct *tsec; struct file_security_struct *fsec; - tsec = current->security; + tsec = current->cred->security; fsec = file->f_security; fsec->fown_sid = tsec->sid; @@ -3125,7 +3126,7 @@ static int selinux_file_send_sigiotask(struct task_struct *tsk, /* struct fown_struct is never outside the context of a struct file */ file = container_of(fown, struct file, f_owner); - tsec = tsk->security; + tsec = tsk->cred->security; fsec = file->f_security; if (!signum) @@ -3188,12 +3189,12 @@ static int selinux_task_alloc_security(struct task_struct *tsk) struct task_security_struct *tsec1, *tsec2; int rc; - tsec1 = current->security; + tsec1 = current->cred->security; rc = task_alloc_security(tsk); if (rc) return rc; - tsec2 = tsk->security; + tsec2 = tsk->cred->security; tsec2->osid = tsec1->osid; tsec2->sid = tsec1->sid; @@ -3251,7 +3252,7 @@ static int selinux_task_getsid(struct task_struct *p) static void selinux_task_getsecid(struct task_struct *p, u32 *secid) { - struct task_security_struct *tsec = p->security; + struct task_security_struct *tsec = p->cred->security; *secid = tsec->sid; } @@ -3343,7 +3344,7 @@ static int selinux_task_kill(struct task_struct *p, struct siginfo *info, perm = PROCESS__SIGNULL; /* null signal; existence test */ else perm = signal_to_av(sig); - tsec = p->security; + tsec = p->cred->security; if (secid) rc = avc_has_perm(secid, tsec->sid, SECCLASS_PROCESS, perm, NULL); else @@ -3375,7 +3376,7 @@ static void selinux_task_reparent_to_init(struct task_struct *p) secondary_ops->task_reparent_to_init(p); - tsec = p->security; + tsec = p->cred->security; tsec->osid = tsec->sid; tsec->sid = SECINITSID_KERNEL; return; @@ -3384,7 +3385,7 @@ static void selinux_task_reparent_to_init(struct task_struct *p) static void selinux_task_to_inode(struct task_struct *p, struct inode *inode) { - struct task_security_struct *tsec = p->security; + struct task_security_struct *tsec = p->cred->security; struct inode_security_struct *isec = inode->i_security; isec->sid = tsec->sid; @@ -3632,7 +3633,7 @@ static int socket_has_perm(struct task_struct *task, struct socket *sock, struct avc_audit_data ad; int err = 0; - tsec = task->security; + tsec = task->cred->security; isec = SOCK_INODE(sock)->i_security; if (isec->sid == SECINITSID_KERNEL) @@ -3656,7 +3657,7 @@ static int selinux_socket_create(int family, int type, if (kern) goto out; - tsec = current->security; + tsec = current->cred->security; newsid = tsec->sockcreate_sid ? : tsec->sid; err = avc_has_perm(tsec->sid, newsid, socket_type_to_security_class(family, type, @@ -3677,7 +3678,7 @@ static int selinux_socket_post_create(struct socket *sock, int family, isec = SOCK_INODE(sock)->i_security; - tsec = current->security; + tsec = current->cred->security; newsid = tsec->sockcreate_sid ? : tsec->sid; isec->sclass = socket_type_to_security_class(family, type, protocol); isec->sid = kern ? SECINITSID_KERNEL : newsid; @@ -3723,7 +3724,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in struct sock *sk = sock->sk; u32 sid, node_perm; - tsec = current->security; + tsec = current->cred->security; isec = SOCK_INODE(sock)->i_security; if (family == PF_INET) { @@ -4764,7 +4765,7 @@ static int ipc_alloc_security(struct task_struct *task, struct kern_ipc_perm *perm, u16 sclass) { - struct task_security_struct *tsec = task->security; + struct task_security_struct *tsec = task->cred->security; struct ipc_security_struct *isec; isec = kzalloc(sizeof(struct ipc_security_struct), GFP_KERNEL); @@ -4814,7 +4815,7 @@ static int ipc_has_perm(struct kern_ipc_perm *ipc_perms, struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = ipc_perms->security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -4845,7 +4846,7 @@ static int selinux_msg_queue_alloc_security(struct msg_queue *msq) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; isec = msq->q_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -4871,7 +4872,7 @@ static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg) struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = msq->q_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -4917,7 +4918,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, struct avc_audit_data ad; int rc; - tsec = current->security; + tsec = current->cred->security; isec = msq->q_perm.security; msec = msg->security; @@ -4965,7 +4966,7 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, struct avc_audit_data ad; int rc; - tsec = target->security; + tsec = target->cred->security; isec = msq->q_perm.security; msec = msg->security; @@ -4992,7 +4993,7 @@ static int selinux_shm_alloc_security(struct shmid_kernel *shp) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; isec = shp->shm_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5018,7 +5019,7 @@ static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg) struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = shp->shm_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5091,7 +5092,7 @@ static int selinux_sem_alloc_security(struct sem_array *sma) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; isec = sma->sem_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5117,7 +5118,7 @@ static int selinux_sem_associate(struct sem_array *sma, int semflg) struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = sma->sem_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5224,7 +5225,7 @@ static int selinux_getprocattr(struct task_struct *p, return error; } - tsec = p->security; + tsec = p->cred->security; if (!strcmp(name, "current")) sid = tsec->sid; @@ -5308,7 +5309,7 @@ static int selinux_setprocattr(struct task_struct *p, operation. See selinux_bprm_set_security for the execve checks and may_create for the file creation checks. The operation will then fail if the context is not permitted. */ - tsec = p->security; + tsec = p->cred->security; if (!strcmp(name, "exec")) tsec->exec_sid = sid; else if (!strcmp(name, "fscreate")) @@ -5361,7 +5362,8 @@ boundary_ok: rcu_read_lock(); tracer = tracehook_tracer_task(p); if (tracer != NULL) { - struct task_security_struct *ptsec = tracer->security; + struct task_security_struct *ptsec = + tracer->cred->security; u32 ptsid = ptsec->sid; rcu_read_unlock(); error = avc_has_perm_noaudit(ptsid, sid, @@ -5405,7 +5407,7 @@ static void selinux_release_secctx(char *secdata, u32 seclen) static int selinux_key_alloc(struct key *k, struct task_struct *tsk, unsigned long flags) { - struct task_security_struct *tsec = tsk->security; + struct task_security_struct *tsec = tsk->cred->security; struct key_security_struct *ksec; ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL); @@ -5439,7 +5441,7 @@ static int selinux_key_permission(key_ref_t key_ref, key = key_ref_to_ptr(key_ref); - tsec = ctx->security; + tsec = ctx->cred->security; ksec = key->security; /* if no specific permissions are requested, we skip the @@ -5683,7 +5685,7 @@ static __init int selinux_init(void) /* Set the security state for the initial task. */ if (task_alloc_security(current)) panic("SELinux: Failed to initialize initial task.\n"); - tsec = current->security; + tsec = current->cred->security; tsec->osid = tsec->sid = SECINITSID_KERNEL; sel_inode_cache = kmem_cache_create("selinux_inode_security", diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 69c9dccc8cf0..10715d1330b9 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -97,7 +97,7 @@ static int task_has_security(struct task_struct *tsk, { struct task_security_struct *tsec; - tsec = tsk->security; + tsec = tsk->cred->security; if (!tsec) return -EACCES; diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 8f17f542a116..d7db76617b0e 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -197,7 +197,7 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx, u32 sid) { int rc = 0; - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct xfrm_sec_ctx *ctx = NULL; char *ctx_str = NULL; u32 str_len; @@ -333,7 +333,7 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx) */ int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; int rc = 0; if (ctx) { @@ -378,7 +378,7 @@ void selinux_xfrm_state_free(struct xfrm_state *x) */ int selinux_xfrm_state_delete(struct xfrm_state *x) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct xfrm_sec_ctx *ctx = x->security; int rc = 0; diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index 79ff21ed4c3b..b6dd4fc0fb0b 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -164,7 +164,7 @@ int smk_curacc(char *obj_label, u32 mode) { int rc; - rc = smk_access(current->security, obj_label, mode); + rc = smk_access(current->cred->security, obj_label, mode); if (rc == 0) return 0; @@ -173,7 +173,7 @@ int smk_curacc(char *obj_label, u32 mode) * only one that gets privilege and current does not * have that label. */ - if (smack_onlycap != NULL && smack_onlycap != current->security) + if (smack_onlycap != NULL && smack_onlycap != current->cred->security) return rc; if (capable(CAP_MAC_OVERRIDE)) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 6e2dc0bab70d..791da238d049 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -102,7 +102,8 @@ static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode) if (rc != 0) return rc; - rc = smk_access(current->security, ctp->security, MAY_READWRITE); + rc = smk_access(current->cred->security, ctp->cred->security, + MAY_READWRITE); if (rc != 0 && capable(CAP_MAC_OVERRIDE)) return 0; return rc; @@ -124,7 +125,8 @@ static int smack_ptrace_traceme(struct task_struct *ptp) if (rc != 0) return rc; - rc = smk_access(ptp->security, current->security, MAY_READWRITE); + rc = smk_access(ptp->cred->security, current->cred->security, + MAY_READWRITE); if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE)) return 0; return rc; @@ -141,7 +143,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp) static int smack_syslog(int type) { int rc; - char *sp = current->security; + char *sp = current->cred->security; rc = cap_syslog(type); if (rc != 0) @@ -373,7 +375,7 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags) */ static int smack_inode_alloc_security(struct inode *inode) { - inode->i_security = new_inode_smack(current->security); + inode->i_security = new_inode_smack(current->cred->security); if (inode->i_security == NULL) return -ENOMEM; return 0; @@ -818,7 +820,7 @@ static int smack_file_permission(struct file *file, int mask) */ static int smack_file_alloc_security(struct file *file) { - file->f_security = current->security; + file->f_security = current->cred->security; return 0; } @@ -916,7 +918,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, */ static int smack_file_set_fowner(struct file *file) { - file->f_security = current->security; + file->f_security = current->cred->security; return 0; } @@ -941,7 +943,7 @@ static int smack_file_send_sigiotask(struct task_struct *tsk, * struct fown_struct is never outside the context of a struct file */ file = container_of(fown, struct file, f_owner); - rc = smk_access(file->f_security, tsk->security, MAY_WRITE); + rc = smk_access(file->f_security, tsk->cred->security, MAY_WRITE); if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE)) return 0; return rc; @@ -984,7 +986,7 @@ static int smack_file_receive(struct file *file) */ static int smack_task_alloc_security(struct task_struct *tsk) { - tsk->security = current->security; + tsk->cred->security = current->cred->security; return 0; } @@ -999,7 +1001,7 @@ static int smack_task_alloc_security(struct task_struct *tsk) */ static void smack_task_free_security(struct task_struct *task) { - task->security = NULL; + task->cred->security = NULL; } /** @@ -1011,7 +1013,7 @@ static void smack_task_free_security(struct task_struct *task) */ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) { - return smk_curacc(p->security, MAY_WRITE); + return smk_curacc(p->cred->security, MAY_WRITE); } /** @@ -1022,7 +1024,7 @@ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) */ static int smack_task_getpgid(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1033,7 +1035,7 @@ static int smack_task_getpgid(struct task_struct *p) */ static int smack_task_getsid(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1045,7 +1047,7 @@ static int smack_task_getsid(struct task_struct *p) */ static void smack_task_getsecid(struct task_struct *p, u32 *secid) { - *secid = smack_to_secid(p->security); + *secid = smack_to_secid(p->cred->security); } /** @@ -1061,7 +1063,7 @@ static int smack_task_setnice(struct task_struct *p, int nice) rc = cap_task_setnice(p, nice); if (rc == 0) - rc = smk_curacc(p->security, MAY_WRITE); + rc = smk_curacc(p->cred->security, MAY_WRITE); return rc; } @@ -1078,7 +1080,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) rc = cap_task_setioprio(p, ioprio); if (rc == 0) - rc = smk_curacc(p->security, MAY_WRITE); + rc = smk_curacc(p->cred->security, MAY_WRITE); return rc; } @@ -1090,7 +1092,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) */ static int smack_task_getioprio(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1108,7 +1110,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, rc = cap_task_setscheduler(p, policy, lp); if (rc == 0) - rc = smk_curacc(p->security, MAY_WRITE); + rc = smk_curacc(p->cred->security, MAY_WRITE); return rc; } @@ -1120,7 +1122,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, */ static int smack_task_getscheduler(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1131,7 +1133,7 @@ static int smack_task_getscheduler(struct task_struct *p) */ static int smack_task_movememory(struct task_struct *p) { - return smk_curacc(p->security, MAY_WRITE); + return smk_curacc(p->cred->security, MAY_WRITE); } /** @@ -1154,13 +1156,13 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info, * can write the receiver. */ if (secid == 0) - return smk_curacc(p->security, MAY_WRITE); + return smk_curacc(p->cred->security, MAY_WRITE); /* * If the secid isn't 0 we're dealing with some USB IO * specific behavior. This is not clean. For one thing * we can't take privilege into account. */ - return smk_access(smack_from_secid(secid), p->security, MAY_WRITE); + return smk_access(smack_from_secid(secid), p->cred->security, MAY_WRITE); } /** @@ -1173,7 +1175,7 @@ static int smack_task_wait(struct task_struct *p) { int rc; - rc = smk_access(current->security, p->security, MAY_WRITE); + rc = smk_access(current->cred->security, p->cred->security, MAY_WRITE); if (rc == 0) return 0; @@ -1204,7 +1206,7 @@ static int smack_task_wait(struct task_struct *p) static void smack_task_to_inode(struct task_struct *p, struct inode *inode) { struct inode_smack *isp = inode->i_security; - isp->smk_inode = p->security; + isp->smk_inode = p->cred->security; } /* @@ -1223,7 +1225,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode) */ static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags) { - char *csp = current->security; + char *csp = current->cred->security; struct socket_smack *ssp; ssp = kzalloc(sizeof(struct socket_smack), gfp_flags); @@ -1448,7 +1450,7 @@ static int smack_flags_to_may(int flags) */ static int smack_msg_msg_alloc_security(struct msg_msg *msg) { - msg->security = current->security; + msg->security = current->cred->security; return 0; } @@ -1484,7 +1486,7 @@ static int smack_shm_alloc_security(struct shmid_kernel *shp) { struct kern_ipc_perm *isp = &shp->shm_perm; - isp->security = current->security; + isp->security = current->cred->security; return 0; } @@ -1593,7 +1595,7 @@ static int smack_sem_alloc_security(struct sem_array *sma) { struct kern_ipc_perm *isp = &sma->sem_perm; - isp->security = current->security; + isp->security = current->cred->security; return 0; } @@ -1697,7 +1699,7 @@ static int smack_msg_queue_alloc_security(struct msg_queue *msq) { struct kern_ipc_perm *kisp = &msq->q_perm; - kisp->security = current->security; + kisp->security = current->cred->security; return 0; } @@ -1852,7 +1854,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) struct super_block *sbp; struct superblock_smack *sbsp; struct inode_smack *isp; - char *csp = current->security; + char *csp = current->cred->security; char *fetched; char *final; struct dentry *dp; @@ -2009,7 +2011,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value) if (strcmp(name, "current") != 0) return -EINVAL; - cp = kstrdup(p->security, GFP_KERNEL); + cp = kstrdup(p->cred->security, GFP_KERNEL); if (cp == NULL) return -ENOMEM; @@ -2055,7 +2057,7 @@ static int smack_setprocattr(struct task_struct *p, char *name, if (newsmack == NULL) return -EINVAL; - p->security = newsmack; + p->cred->security = newsmack; return size; } @@ -2288,8 +2290,8 @@ static void smack_sock_graft(struct sock *sk, struct socket *parent) return; ssp = sk->sk_security; - ssp->smk_in = current->security; - ssp->smk_out = current->security; + ssp->smk_in = current->cred->security; + ssp->smk_out = current->cred->security; ssp->smk_packet[0] = '\0'; rc = smack_netlabel(sk); @@ -2362,7 +2364,7 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, static int smack_key_alloc(struct key *key, struct task_struct *tsk, unsigned long flags) { - key->security = tsk->security; + key->security = tsk->cred->security; return 0; } @@ -2403,10 +2405,11 @@ static int smack_key_permission(key_ref_t key_ref, /* * This should not occur */ - if (context->security == NULL) + if (context->cred->security == NULL) return -EACCES; - return smk_access(context->security, keyp->security, MAY_READWRITE); + return smk_access(context->cred->security, keyp->security, + MAY_READWRITE); } #endif /* CONFIG_KEYS */ @@ -2726,7 +2729,7 @@ static __init int smack_init(void) /* * Set the security state for the initial task. */ - current->security = &smack_known_floor.smk_known; + current->cred->security = &smack_known_floor.smk_known; /* * Initialize locks diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index c21d8c8bf0c7..c5ca279e0506 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -336,7 +336,7 @@ static void smk_cipso_doi(void) audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = smack_to_secid(current->security); + audit_info.secid = smack_to_secid(current->cred->security); rc = netlbl_cfg_map_del(NULL, &audit_info); if (rc != 0) @@ -371,7 +371,7 @@ static void smk_unlbl_ambient(char *oldambient) audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = smack_to_secid(current->security); + audit_info.secid = smack_to_secid(current->cred->security); if (oldambient != NULL) { rc = netlbl_cfg_map_del(oldambient, &audit_info); @@ -843,7 +843,7 @@ static ssize_t smk_write_onlycap(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char in[SMK_LABELLEN]; - char *sp = current->security; + char *sp = current->cred->security; if (!capable(CAP_MAC_ADMIN)) return -EPERM; -- cgit v1.2.3 From f1752eec6145c97163dbce62d17cf5d928e28a27 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:17 +1100 Subject: CRED: Detach the credentials from task_struct Detach the credentials from task_struct, duplicating them in copy_process() and releasing them in __put_task_struct(). Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/cred.h | 29 ++++++++++++++ include/linux/init_task.h | 16 +------- include/linux/sched.h | 1 - include/linux/security.h | 26 ++++++------- kernel/Makefile | 2 +- kernel/cred.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/fork.c | 24 +++--------- security/capability.c | 8 ++-- security/security.c | 8 ++-- security/selinux/hooks.c | 32 ++++++++-------- security/smack/smack_lsm.c | 20 +++++----- 11 files changed, 179 insertions(+), 83 deletions(-) create mode 100644 kernel/cred.c (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index 3e65587a72e5..a7a686074cb0 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -158,4 +158,33 @@ do { \ *(_gid) = current->cred->fsgid; \ } while(0) +extern void __put_cred(struct cred *); +extern int copy_creds(struct task_struct *, unsigned long); + +/** + * get_cred - Get a reference on a set of credentials + * @cred: The credentials to reference + * + * Get a reference on the specified set of credentials. The caller must + * release the reference. + */ +static inline struct cred *get_cred(struct cred *cred) +{ + atomic_inc(&cred->usage); + return cred; +} + +/** + * put_cred - Release a reference to a set of credentials + * @cred: The credentials to release + * + * Release a reference to a set of credentials, deleting them when the last ref + * is released. + */ +static inline void put_cred(struct cred *cred) +{ + if (atomic_dec_and_test(&(cred)->usage)) + __put_cred(cred); +} + #endif /* _LINUX_CRED_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9de41ccd67b5..5e24c54b6dfd 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -115,19 +115,6 @@ extern struct group_info init_groups; extern struct cred init_cred; -#define INIT_CRED(p) \ -{ \ - .usage = ATOMIC_INIT(3), \ - .securebits = SECUREBITS_DEFAULT, \ - .cap_inheritable = CAP_INIT_INH_SET, \ - .cap_permitted = CAP_FULL_SET, \ - .cap_effective = CAP_INIT_EFF_SET, \ - .cap_bset = CAP_INIT_BSET, \ - .user = INIT_USER, \ - .group_info = &init_groups, \ - .lock = __SPIN_LOCK_UNLOCKED(p.lock), \ -} - /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -162,8 +149,7 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - .__temp_cred = INIT_CRED(tsk.__temp_cred), \ - .cred = &tsk.__temp_cred, \ + .cred = &init_cred, \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index c8b92502354d..740cf946c8cc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1151,7 +1151,6 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - struct cred __temp_cred __deprecated; /* temporary credentials to be removed */ struct cred *cred; /* actual/objective task credentials */ char comm[TASK_COMM_LEN]; /* executable name excluding path diff --git a/include/linux/security.h b/include/linux/security.h index 9f305d4a31a7..9239cc11eb9c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -593,15 +593,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * manual page for definitions of the @clone_flags. * @clone_flags contains the flags indicating what should be shared. * Return 0 if permission is granted. - * @task_alloc_security: - * @p contains the task_struct for child process. - * Allocate and attach a security structure to the p->security field. The - * security field is initialized to NULL when the task structure is + * @cred_alloc_security: + * @cred contains the cred struct for child process. + * Allocate and attach a security structure to the cred->security field. + * The security field is initialized to NULL when the task structure is * allocated. * Return 0 if operation was successful. - * @task_free_security: - * @p contains the task_struct for process. - * Deallocate and clear the p->security field. + * @cred_free: + * @cred points to the credentials. + * Deallocate and clear the cred->security field in a set of credentials. * @task_setuid: * Check permission before setting one or more of the user identity * attributes of the current process. The @flags parameter indicates @@ -1405,8 +1405,8 @@ struct security_operations { int (*dentry_open) (struct file *file); int (*task_create) (unsigned long clone_flags); - int (*task_alloc_security) (struct task_struct *p); - void (*task_free_security) (struct task_struct *p); + int (*cred_alloc_security) (struct cred *cred); + void (*cred_free) (struct cred *cred); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); int (*task_post_setuid) (uid_t old_ruid /* or fsuid */ , uid_t old_euid, uid_t old_suid, int flags); @@ -1660,8 +1660,8 @@ int security_file_send_sigiotask(struct task_struct *tsk, int security_file_receive(struct file *file); int security_dentry_open(struct file *file); int security_task_create(unsigned long clone_flags); -int security_task_alloc(struct task_struct *p); -void security_task_free(struct task_struct *p); +int security_cred_alloc(struct cred *cred); +void security_cred_free(struct cred *cred); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); @@ -2181,12 +2181,12 @@ static inline int security_task_create(unsigned long clone_flags) return 0; } -static inline int security_task_alloc(struct task_struct *p) +static inline int security_cred_alloc(struct cred *cred) { return 0; } -static inline void security_task_free(struct task_struct *p) +static inline void security_cred_free(struct cred *cred) { } static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, diff --git a/kernel/Makefile b/kernel/Makefile index 9a3ec66a9d84..5a6a612c302d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ - notifier.o ksysfs.o pm_qos_params.o sched_clock.o + notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o CFLAGS_REMOVE_sched.o = -mno-spe diff --git a/kernel/cred.c b/kernel/cred.c new file mode 100644 index 000000000000..833244a7cb05 --- /dev/null +++ b/kernel/cred.c @@ -0,0 +1,96 @@ +/* Task credentials management + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include + +/* + * The initial credentials for the initial task + */ +struct cred init_cred = { + .usage = ATOMIC_INIT(3), + .securebits = SECUREBITS_DEFAULT, + .cap_inheritable = CAP_INIT_INH_SET, + .cap_permitted = CAP_FULL_SET, + .cap_effective = CAP_INIT_EFF_SET, + .cap_bset = CAP_INIT_BSET, + .user = INIT_USER, + .group_info = &init_groups, +}; + +/* + * The RCU callback to actually dispose of a set of credentials + */ +static void put_cred_rcu(struct rcu_head *rcu) +{ + struct cred *cred = container_of(rcu, struct cred, rcu); + + BUG_ON(atomic_read(&cred->usage) != 0); + + key_put(cred->thread_keyring); + key_put(cred->request_key_auth); + put_group_info(cred->group_info); + free_uid(cred->user); + security_cred_free(cred); + kfree(cred); +} + +/** + * __put_cred - Destroy a set of credentials + * @sec: The record to release + * + * Destroy a set of credentials on which no references remain. + */ +void __put_cred(struct cred *cred) +{ + call_rcu(&cred->rcu, put_cred_rcu); +} +EXPORT_SYMBOL(__put_cred); + +/* + * Copy credentials for the new process created by fork() + */ +int copy_creds(struct task_struct *p, unsigned long clone_flags) +{ + struct cred *pcred; + int ret; + + pcred = kmemdup(p->cred, sizeof(*p->cred), GFP_KERNEL); + if (!pcred) + return -ENOMEM; + +#ifdef CONFIG_SECURITY + pcred->security = NULL; +#endif + + ret = security_cred_alloc(pcred); + if (ret < 0) { + kfree(pcred); + return ret; + } + + atomic_set(&pcred->usage, 1); + get_group_info(pcred->group_info); + get_uid(pcred->user); + key_get(pcred->thread_keyring); + key_get(pcred->request_key_auth); + + atomic_inc(&pcred->user->processes); + + /* RCU assignment is unneeded here as no-one can have accessed this + * pointer yet, barring us */ + p->cred = pcred; + return 0; +} diff --git a/kernel/fork.c b/kernel/fork.c index 81fdc7733908..c932e283ddfc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -146,9 +146,7 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); - security_task_free(tsk); - free_uid(tsk->__temp_cred.user); - put_group_info(tsk->__temp_cred.group_info); + put_cred(tsk->cred); delayacct_tsk_free(tsk); if (!profile_handoff_task(tsk)) @@ -969,7 +967,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif - p->cred = &p->__temp_cred; retval = -EAGAIN; if (atomic_read(&p->cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { @@ -978,9 +975,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_free; } - atomic_inc(&p->cred->user->__count); - atomic_inc(&p->cred->user->processes); - get_group_info(p->cred->group_info); + retval = copy_creds(p, clone_flags); + if (retval < 0) + goto bad_fork_free; /* * If multiple threads are within copy_process(), then this check @@ -1035,9 +1032,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, do_posix_clock_monotonic_gettime(&p->start_time); p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); -#ifdef CONFIG_SECURITY - p->cred->security = NULL; -#endif p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); @@ -1082,10 +1076,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); - if ((retval = security_task_alloc(p))) - goto bad_fork_cleanup_policy; if ((retval = audit_alloc(p))) - goto bad_fork_cleanup_security; + goto bad_fork_cleanup_policy; /* copy all the process information */ if ((retval = copy_semundo(clone_flags, p))) goto bad_fork_cleanup_audit; @@ -1284,8 +1276,6 @@ bad_fork_cleanup_semundo: exit_sem(p); bad_fork_cleanup_audit: audit_free(p); -bad_fork_cleanup_security: - security_task_free(p); bad_fork_cleanup_policy: #ifdef CONFIG_NUMA mpol_put(p->mempolicy); @@ -1298,9 +1288,7 @@ bad_fork_cleanup_cgroup: bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: - put_group_info(p->cred->group_info); - atomic_dec(&p->cred->user->processes); - free_uid(p->cred->user); + put_cred(p->cred); bad_fork_free: free_task(p); fork_out: diff --git a/security/capability.c b/security/capability.c index 245874819036..6c4b5137ca7b 100644 --- a/security/capability.c +++ b/security/capability.c @@ -340,12 +340,12 @@ static int cap_task_create(unsigned long clone_flags) return 0; } -static int cap_task_alloc_security(struct task_struct *p) +static int cap_cred_alloc_security(struct cred *cred) { return 0; } -static void cap_task_free_security(struct task_struct *p) +static void cap_cred_free(struct cred *cred) { } @@ -890,8 +890,8 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, file_receive); set_to_cap_if_null(ops, dentry_open); set_to_cap_if_null(ops, task_create); - set_to_cap_if_null(ops, task_alloc_security); - set_to_cap_if_null(ops, task_free_security); + set_to_cap_if_null(ops, cred_alloc_security); + set_to_cap_if_null(ops, cred_free); set_to_cap_if_null(ops, task_setuid); set_to_cap_if_null(ops, task_post_setuid); set_to_cap_if_null(ops, task_setgid); diff --git a/security/security.c b/security/security.c index 81c956a12300..d058f7d5b10a 100644 --- a/security/security.c +++ b/security/security.c @@ -616,14 +616,14 @@ int security_task_create(unsigned long clone_flags) return security_ops->task_create(clone_flags); } -int security_task_alloc(struct task_struct *p) +int security_cred_alloc(struct cred *cred) { - return security_ops->task_alloc_security(p); + return security_ops->cred_alloc_security(cred); } -void security_task_free(struct task_struct *p) +void security_cred_free(struct cred *cred) { - security_ops->task_free_security(p); + security_ops->cred_free(cred); } int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 328308f2882a..658435dce37c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -158,7 +158,7 @@ static int selinux_secmark_enabled(void) /* Allocate and free functions for each kind of security blob. */ -static int task_alloc_security(struct task_struct *task) +static int cred_alloc_security(struct cred *cred) { struct task_security_struct *tsec; @@ -167,18 +167,11 @@ static int task_alloc_security(struct task_struct *task) return -ENOMEM; tsec->osid = tsec->sid = SECINITSID_UNLABELED; - task->cred->security = tsec; + cred->security = tsec; return 0; } -static void task_free_security(struct task_struct *task) -{ - struct task_security_struct *tsec = task->cred->security; - task->cred->security = NULL; - kfree(tsec); -} - static int inode_alloc_security(struct inode *inode) { struct task_security_struct *tsec = current->cred->security; @@ -3184,17 +3177,17 @@ static int selinux_task_create(unsigned long clone_flags) return task_has_perm(current, current, PROCESS__FORK); } -static int selinux_task_alloc_security(struct task_struct *tsk) +static int selinux_cred_alloc_security(struct cred *cred) { struct task_security_struct *tsec1, *tsec2; int rc; tsec1 = current->cred->security; - rc = task_alloc_security(tsk); + rc = cred_alloc_security(cred); if (rc) return rc; - tsec2 = tsk->cred->security; + tsec2 = cred->security; tsec2->osid = tsec1->osid; tsec2->sid = tsec1->sid; @@ -3208,9 +3201,14 @@ static int selinux_task_alloc_security(struct task_struct *tsk) return 0; } -static void selinux_task_free_security(struct task_struct *tsk) +/* + * detach and free the LSM part of a set of credentials + */ +static void selinux_cred_free(struct cred *cred) { - task_free_security(tsk); + struct task_security_struct *tsec = cred->security; + cred->security = NULL; + kfree(tsec); } static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) @@ -5552,8 +5550,8 @@ static struct security_operations selinux_ops = { .dentry_open = selinux_dentry_open, .task_create = selinux_task_create, - .task_alloc_security = selinux_task_alloc_security, - .task_free_security = selinux_task_free_security, + .cred_alloc_security = selinux_cred_alloc_security, + .cred_free = selinux_cred_free, .task_setuid = selinux_task_setuid, .task_post_setuid = selinux_task_post_setuid, .task_setgid = selinux_task_setgid, @@ -5683,7 +5681,7 @@ static __init int selinux_init(void) printk(KERN_INFO "SELinux: Initializing.\n"); /* Set the security state for the initial task. */ - if (task_alloc_security(current)) + if (cred_alloc_security(current->cred)) panic("SELinux: Failed to initialize initial task.\n"); tsec = current->cred->security; tsec->osid = tsec->sid = SECINITSID_KERNEL; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 791da238d049..cc837314fb0e 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -975,8 +975,8 @@ static int smack_file_receive(struct file *file) */ /** - * smack_task_alloc_security - "allocate" a task blob - * @tsk: the task in need of a blob + * smack_cred_alloc_security - "allocate" a task cred blob + * @cred: the task creds in need of a blob * * Smack isn't using copies of blobs. Everyone * points to an immutable list. No alloc required. @@ -984,24 +984,24 @@ static int smack_file_receive(struct file *file) * * Always returns 0 */ -static int smack_task_alloc_security(struct task_struct *tsk) +static int smack_cred_alloc_security(struct cred *cred) { - tsk->cred->security = current->cred->security; + cred->security = current->cred->security; return 0; } /** - * smack_task_free_security - "free" a task blob - * @task: the task with the blob + * smack_cred_free - "free" task-level security credentials + * @cred: the credentials in question * * Smack isn't using copies of blobs. Everyone * points to an immutable list. The blobs never go away. * There is no leak here. */ -static void smack_task_free_security(struct task_struct *task) +static void smack_cred_free(struct cred *cred) { - task->cred->security = NULL; + cred->security = NULL; } /** @@ -2630,8 +2630,8 @@ struct security_operations smack_ops = { .file_send_sigiotask = smack_file_send_sigiotask, .file_receive = smack_file_receive, - .task_alloc_security = smack_task_alloc_security, - .task_free_security = smack_task_free_security, + .cred_alloc_security = smack_cred_alloc_security, + .cred_free = smack_cred_free, .task_post_setuid = cap_task_post_setuid, .task_setpgid = smack_task_setpgid, .task_getpgid = smack_task_getpgid, -- cgit v1.2.3 From 86a264abe542cfececb4df129bc45a0338d8cdb9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:18 +1100 Subject: CRED: Wrap current->cred and a few other accessors Wrap current->cred and a few other accessors to hide their actual implementation. Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- arch/ia64/ia32/sys_ia32.c | 7 +- drivers/net/tun.c | 8 +- drivers/usb/core/devio.c | 10 ++- fs/binfmt_elf.c | 10 +-- fs/binfmt_elf_fdpic.c | 9 +- fs/exec.c | 5 +- fs/fcntl.c | 3 +- fs/file_table.c | 7 +- fs/hugetlbfs/inode.c | 5 +- fs/ioprio.c | 4 +- fs/smbfs/dir.c | 3 +- include/linux/cred.h | 187 ++++++++++++++++++++++++++++++++---------- include/linux/securebits.h | 2 +- ipc/mqueue.c | 2 +- ipc/shm.c | 4 +- kernel/sys.c | 59 +++++++------ kernel/uid16.c | 31 +++---- net/core/scm.c | 2 +- net/sunrpc/auth.c | 14 ++-- security/commoncap.c | 2 +- security/keys/process_keys.c | 2 +- security/keys/request_key.c | 11 +-- security/selinux/exports.c | 8 +- security/selinux/xfrm.c | 6 +- security/smack/smack_access.c | 2 +- security/smack/smack_lsm.c | 26 +++--- security/smack/smackfs.c | 4 +- 27 files changed, 271 insertions(+), 162 deletions(-) (limited to 'include') diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 2445a9d3488e..16ef61a91d95 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1767,25 +1767,24 @@ groups16_from_user(struct group_info *group_info, short __user *grouplist) asmlinkage long sys32_getgroups16 (int gidsetsize, short __user *grouplist) { + const struct cred *cred = current_cred(); int i; if (gidsetsize < 0) return -EINVAL; - get_group_info(current->cred->group_info); - i = current->cred->group_info->ngroups; + i = cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->cred->group_info)) { + if (groups16_to_user(grouplist, cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->cred->group_info); return i; } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b14e2025e221..55dc70c6b4db 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -702,6 +702,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) struct tun_net *tn; struct tun_struct *tun; struct net_device *dev; + const struct cred *cred = current_cred(); int err; tn = net_generic(net, tun_net_id); @@ -712,11 +713,12 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) /* Check permissions */ if (((tun->owner != -1 && - current_euid() != tun->owner) || + cred->euid != tun->owner) || (tun->group != -1 && - current_egid() != tun->group)) && - !capable(CAP_NET_ADMIN)) + cred->egid != tun->group)) && + !capable(CAP_NET_ADMIN)) { return -EPERM; + } } else if (__dev_get_by_name(net, ifr->ifr_name)) return -EINVAL; diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 1aadb9387027..aa79280df15d 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -574,6 +574,7 @@ static int usbdev_open(struct inode *inode, struct file *file) { struct usb_device *dev = NULL; struct dev_state *ps; + const struct cred *cred = current_cred(); int ret; lock_kernel(); @@ -617,8 +618,8 @@ static int usbdev_open(struct inode *inode, struct file *file) init_waitqueue_head(&ps->wait); ps->discsignr = 0; ps->disc_pid = get_pid(task_pid(current)); - ps->disc_uid = current_uid(); - ps->disc_euid = current_euid(); + ps->disc_uid = cred->uid; + ps->disc_euid = cred->euid; ps->disccontext = NULL; ps->ifclaimed = 0; security_task_getsecid(current, &ps->secid); @@ -967,6 +968,7 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb, struct usb_host_endpoint *ep; struct async *as; struct usb_ctrlrequest *dr = NULL; + const struct cred *cred = current_cred(); unsigned int u, totlen, isofrmlen; int ret, ifnum = -1; int is_in; @@ -1174,8 +1176,8 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb, as->signr = uurb->signr; as->ifnum = ifnum; as->pid = get_pid(task_pid(current)); - as->uid = current_uid(); - as->euid = current_euid(); + as->uid = cred->uid; + as->euid = cred->euid; security_task_getsecid(current, &as->secid); if (!is_in) { if (copy_from_user(as->urb->transfer_buffer, uurb->buffer, diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7a52477ce493..0e6655613169 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -157,7 +157,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, int items; elf_addr_t *elf_info; int ei_index = 0; - struct task_struct *tsk = current; + const struct cred *cred = current_cred(); struct vm_area_struct *vma; /* @@ -223,10 +223,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, NEW_AUX_ENT(AT_BASE, interp_load_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec->e_entry); - NEW_AUX_ENT(AT_UID, tsk->cred->uid); - NEW_AUX_ENT(AT_EUID, tsk->cred->euid); - NEW_AUX_ENT(AT_GID, tsk->cred->gid); - NEW_AUX_ENT(AT_EGID, tsk->cred->egid); + NEW_AUX_ENT(AT_UID, cred->uid); + NEW_AUX_ENT(AT_EUID, cred->euid); + NEW_AUX_ENT(AT_GID, cred->gid); + NEW_AUX_ENT(AT_EGID, cred->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); if (k_platform) { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 9f67054c2c4e..1f6e8c023b4c 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -475,6 +475,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, struct elf_fdpic_params *exec_params, struct elf_fdpic_params *interp_params) { + const struct cred *cred = current_cred(); unsigned long sp, csp, nitems; elf_caddr_t __user *argv, *envp; size_t platform_len = 0, len; @@ -623,10 +624,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr); - NEW_AUX_ENT(AT_UID, (elf_addr_t) current->cred->uid); - NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->cred->euid); - NEW_AUX_ENT(AT_GID, (elf_addr_t) current->cred->gid); - NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->cred->egid); + NEW_AUX_ENT(AT_UID, (elf_addr_t) cred->uid); + NEW_AUX_ENT(AT_EUID, (elf_addr_t) cred->euid); + NEW_AUX_ENT(AT_GID, (elf_addr_t) cred->gid); + NEW_AUX_ENT(AT_EGID, (elf_addr_t) cred->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); diff --git a/fs/exec.c b/fs/exec.c index 31149e430a89..a5330e1a2216 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1388,6 +1388,7 @@ EXPORT_SYMBOL(set_binfmt); */ static int format_corename(char *corename, long signr) { + const struct cred *cred = current_cred(); const char *pat_ptr = core_pattern; int ispipe = (*pat_ptr == '|'); char *out_ptr = corename; @@ -1424,7 +1425,7 @@ static int format_corename(char *corename, long signr) /* uid */ case 'u': rc = snprintf(out_ptr, out_end - out_ptr, - "%d", current_uid()); + "%d", cred->uid); if (rc > out_end - out_ptr) goto out; out_ptr += rc; @@ -1432,7 +1433,7 @@ static int format_corename(char *corename, long signr) /* gid */ case 'g': rc = snprintf(out_ptr, out_end - out_ptr, - "%d", current_gid()); + "%d", cred->gid); if (rc > out_end - out_ptr) goto out; out_ptr += rc; diff --git a/fs/fcntl.c b/fs/fcntl.c index 63964d863ad6..c594cc0e40fb 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -205,13 +205,14 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, int __f_setown(struct file *filp, struct pid *pid, enum pid_type type, int force) { + const struct cred *cred = current_cred(); int err; err = security_file_set_fowner(filp); if (err) return err; - f_modown(filp, pid, type, current_uid(), current_euid(), force); + f_modown(filp, pid, type, cred->uid, cred->euid, force); return 0; } EXPORT_SYMBOL(__f_setown); diff --git a/fs/file_table.c b/fs/file_table.c index 3152b53cfab0..bc4563fe791d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -94,7 +94,7 @@ int proc_nr_files(ctl_table *table, int write, struct file *filp, */ struct file *get_empty_filp(void) { - struct task_struct *tsk; + const struct cred *cred = current_cred(); static int old_max; struct file * f; @@ -118,12 +118,11 @@ struct file *get_empty_filp(void) if (security_file_alloc(f)) goto fail_sec; - tsk = current; INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); - f->f_uid = tsk->cred->fsuid; - f->f_gid = tsk->cred->fsgid; + f->f_uid = cred->fsuid; + f->f_gid = cred->fsgid; eventpoll_init_file(f); /* f->f_version: 0 */ return f; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 870a721b8bd2..7d479ce3aceb 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -951,6 +951,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size) struct inode *inode; struct dentry *dentry, *root; struct qstr quick_string; + struct user_struct *user = current_user(); if (!hugetlbfs_vfsmount) return ERR_PTR(-ENOENT); @@ -958,7 +959,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size) if (!can_do_hugetlb_shm()) return ERR_PTR(-EPERM); - if (!user_shm_lock(size, current->cred->user)) + if (!user_shm_lock(size, user)) return ERR_PTR(-ENOMEM); root = hugetlbfs_vfsmount->mnt_root; @@ -998,7 +999,7 @@ out_inode: out_dentry: dput(dentry); out_shm_unlock: - user_shm_unlock(size, current->cred->user); + user_shm_unlock(size, user); return ERR_PTR(error); } diff --git a/fs/ioprio.c b/fs/ioprio.c index bb5210af77c2..5112554fd210 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -123,7 +123,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; case IOPRIO_WHO_USER: if (!who) - user = current->cred->user; + user = current_user(); else user = find_user(who); @@ -216,7 +216,7 @@ asmlinkage long sys_ioprio_get(int which, int who) break; case IOPRIO_WHO_USER: if (!who) - user = current->cred->user; + user = current_user(); else user = find_user(who); diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index 9e9bb0db4f6d..e7ddd0328ddc 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -667,8 +667,7 @@ smb_make_node(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID; attr.ia_mode = mode; - attr.ia_uid = current_euid(); - attr.ia_gid = current_egid(); + current_euid_egid(&attr.ia_uid, &attr.ia_gid); if (!new_valid_dev(dev)) return -EINVAL; diff --git a/include/linux/cred.h b/include/linux/cred.h index a7a686074cb0..4221ec6000c1 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -37,15 +37,16 @@ struct group_info { * get_group_info - Get a reference to a group info structure * @group_info: The group info to reference * - * This must be called with the owning task locked (via task_lock()) when task - * != current. The reason being that the vast majority of callers are looking - * at current->group_info, which can not be changed except by the current task. - * Changing current->group_info requires the task lock, too. + * This gets a reference to a set of supplementary groups. + * + * If the caller is accessing a task's credentials, they must hold the RCU read + * lock when reading. */ -#define get_group_info(group_info) \ -do { \ - atomic_inc(&(group_info)->usage); \ -} while (0) +static inline struct group_info *get_group_info(struct group_info *gi) +{ + atomic_inc(&gi->usage); + return gi; +} /** * put_group_info - Release a reference to a group info structure @@ -61,7 +62,7 @@ extern struct group_info *groups_alloc(int); extern void groups_free(struct group_info *); extern int set_current_groups(struct group_info *); extern int set_groups(struct cred *, struct group_info *); -extern int groups_search(struct group_info *, gid_t); +extern int groups_search(const struct group_info *, gid_t); /* access the groups "array" with this macro */ #define GROUP_AT(gi, i) \ @@ -123,41 +124,6 @@ struct cred { spinlock_t lock; /* lock for pointer changes */ }; -#define get_current_user() (get_uid(current->cred->user)) - -#define task_uid(task) ((task)->cred->uid) -#define task_gid(task) ((task)->cred->gid) -#define task_euid(task) ((task)->cred->euid) -#define task_egid(task) ((task)->cred->egid) - -#define current_uid() (current->cred->uid) -#define current_gid() (current->cred->gid) -#define current_euid() (current->cred->euid) -#define current_egid() (current->cred->egid) -#define current_suid() (current->cred->suid) -#define current_sgid() (current->cred->sgid) -#define current_fsuid() (current->cred->fsuid) -#define current_fsgid() (current->cred->fsgid) -#define current_cap() (current->cred->cap_effective) - -#define current_uid_gid(_uid, _gid) \ -do { \ - *(_uid) = current->cred->uid; \ - *(_gid) = current->cred->gid; \ -} while(0) - -#define current_euid_egid(_uid, _gid) \ -do { \ - *(_uid) = current->cred->euid; \ - *(_gid) = current->cred->egid; \ -} while(0) - -#define current_fsuid_fsgid(_uid, _gid) \ -do { \ - *(_uid) = current->cred->fsuid; \ - *(_gid) = current->cred->fsgid; \ -} while(0) - extern void __put_cred(struct cred *); extern int copy_creds(struct task_struct *, unsigned long); @@ -187,4 +153,137 @@ static inline void put_cred(struct cred *cred) __put_cred(cred); } +/** + * current_cred - Access the current task's credentials + * + * Access the credentials of the current task. + */ +#define current_cred() \ + (current->cred) + +/** + * __task_cred - Access another task's credentials + * @task: The task to query + * + * Access the credentials of another task. The caller must hold the + * RCU readlock. + * + * The caller must make sure task doesn't go away, either by holding a ref on + * task or by holding tasklist_lock to prevent it from being unlinked. + */ +#define __task_cred(task) \ + ((const struct cred *)(rcu_dereference((task)->cred))) + +/** + * get_task_cred - Get another task's credentials + * @task: The task to query + * + * Get the credentials of a task, pinning them so that they can't go away. + * Accessing a task's credentials directly is not permitted. + * + * The caller must make sure task doesn't go away, either by holding a ref on + * task or by holding tasklist_lock to prevent it from being unlinked. + */ +#define get_task_cred(task) \ +({ \ + struct cred *__cred; \ + rcu_read_lock(); \ + __cred = (struct cred *) __task_cred((task)); \ + get_cred(__cred); \ + rcu_read_unlock(); \ + __cred; \ +}) + +/** + * get_current_cred - Get the current task's credentials + * + * Get the credentials of the current task, pinning them so that they can't go + * away. Accessing the current task's credentials directly is not permitted. + */ +#define get_current_cred() \ + (get_cred(current_cred())) + +/** + * get_current_user - Get the current task's user_struct + * + * Get the user record of the current task, pinning it so that it can't go + * away. + */ +#define get_current_user() \ +({ \ + struct user_struct *__u; \ + struct cred *__cred; \ + __cred = (struct cred *) current_cred(); \ + __u = get_uid(__cred->user); \ + __u; \ +}) + +/** + * get_current_groups - Get the current task's supplementary group list + * + * Get the supplementary group list of the current task, pinning it so that it + * can't go away. + */ +#define get_current_groups() \ +({ \ + struct group_info *__groups; \ + struct cred *__cred; \ + __cred = (struct cred *) current_cred(); \ + __groups = get_group_info(__cred->group_info); \ + __groups; \ +}) + +#define task_cred_xxx(task, xxx) \ +({ \ + __typeof__(task->cred->xxx) ___val; \ + rcu_read_lock(); \ + ___val = __task_cred((task))->xxx; \ + rcu_read_unlock(); \ + ___val; \ +}) + +#define task_uid(task) (task_cred_xxx((task), uid)) +#define task_euid(task) (task_cred_xxx((task), euid)) + +#define current_cred_xxx(xxx) \ +({ \ + current->cred->xxx; \ +}) + +#define current_uid() (current_cred_xxx(uid)) +#define current_gid() (current_cred_xxx(gid)) +#define current_euid() (current_cred_xxx(euid)) +#define current_egid() (current_cred_xxx(egid)) +#define current_suid() (current_cred_xxx(suid)) +#define current_sgid() (current_cred_xxx(sgid)) +#define current_fsuid() (current_cred_xxx(fsuid)) +#define current_fsgid() (current_cred_xxx(fsgid)) +#define current_cap() (current_cred_xxx(cap_effective)) +#define current_user() (current_cred_xxx(user)) +#define current_security() (current_cred_xxx(security)) + +#define current_uid_gid(_uid, _gid) \ +do { \ + const struct cred *__cred; \ + __cred = current_cred(); \ + *(_uid) = __cred->uid; \ + *(_gid) = __cred->gid; \ +} while(0) + +#define current_euid_egid(_euid, _egid) \ +do { \ + const struct cred *__cred; \ + __cred = current_cred(); \ + *(_euid) = __cred->euid; \ + *(_egid) = __cred->egid; \ +} while(0) + +#define current_fsuid_fsgid(_fsuid, _fsgid) \ +do { \ + const struct cred *__cred; \ + __cred = current_cred(); \ + *(_fsuid) = __cred->fsuid; \ + *(_fsgid) = __cred->fsgid; \ +} while(0) + #endif /* _LINUX_CRED_H */ diff --git a/include/linux/securebits.h b/include/linux/securebits.h index 6d389491bfa2..d2c5ed845bcc 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -32,7 +32,7 @@ setting is locked or not. A setting which is locked cannot be changed from user-level. */ #define issecure_mask(X) (1 << (X)) -#define issecure(X) (issecure_mask(X) & current->cred->securebits) +#define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index e1885b494bac..1151881ccb9a 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -112,6 +112,7 @@ static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode) static struct inode *mqueue_get_inode(struct super_block *sb, int mode, struct mq_attr *attr) { + struct user_struct *u = current_user(); struct inode *inode; inode = new_inode(sb); @@ -126,7 +127,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode, if (S_ISREG(mode)) { struct mqueue_inode_info *info; struct task_struct *p = current; - struct user_struct *u = p->cred->user; unsigned long mq_bytes, mq_msg_tblsz; inode->i_fop = &mqueue_file_operations; diff --git a/ipc/shm.c b/ipc/shm.c index 264a9d33c5dd..38a055758a9b 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -366,7 +366,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (shmflg & SHM_HUGETLB) { /* hugetlb_file_setup takes care of mlock user accounting */ file = hugetlb_file_setup(name, size); - shp->mlock_user = current->cred->user; + shp->mlock_user = current_user(); } else { int acctflag = VM_ACCOUNT; /* @@ -767,7 +767,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) goto out_unlock; if(cmd==SHM_LOCK) { - struct user_struct *user = current->cred->user; + struct user_struct *user = current_user(); if (!is_file_hugepages(shp->shm_file)) { err = shmem_lock(shp->shm_file, 1, user); if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){ diff --git a/kernel/sys.c b/kernel/sys.c index 5d81f07c0150..c4d6b59553e9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -143,6 +143,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) { struct task_struct *g, *p; struct user_struct *user; + const struct cred *cred = current_cred(); int error = -EINVAL; struct pid *pgrp; @@ -176,18 +177,18 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = current->cred->user; + user = cred->user; if (!who) - who = current_uid(); - else - if (who != current_uid() && !(user = find_user(who))) - goto out_unlock; /* No processes for this user */ + who = cred->uid; + else if ((who != cred->uid) && + !(user = find_user(who))) + goto out_unlock; /* No processes for this user */ do_each_thread(g, p) - if (p->cred->uid == who) + if (__task_cred(p)->uid == who) error = set_one_prio(p, niceval, error); while_each_thread(g, p); - if (who != current_uid()) + if (who != cred->uid) free_uid(user); /* For find_user() */ break; } @@ -207,6 +208,7 @@ asmlinkage long sys_getpriority(int which, int who) { struct task_struct *g, *p; struct user_struct *user; + const struct cred *cred = current_cred(); long niceval, retval = -ESRCH; struct pid *pgrp; @@ -238,21 +240,21 @@ asmlinkage long sys_getpriority(int which, int who) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = current->cred->user; + user = (struct user_struct *) cred->user; if (!who) - who = current_uid(); - else - if (who != current_uid() && !(user = find_user(who))) - goto out_unlock; /* No processes for this user */ + who = cred->uid; + else if ((who != cred->uid) && + !(user = find_user(who))) + goto out_unlock; /* No processes for this user */ do_each_thread(g, p) - if (p->cred->uid == who) { + if (__task_cred(p)->uid == who) { niceval = 20 - task_nice(p); if (niceval > retval) retval = niceval; } while_each_thread(g, p); - if (who != current_uid()) + if (who != cred->uid) free_uid(user); /* for find_user() */ break; } @@ -743,11 +745,11 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); int retval; - if (!(retval = put_user(cred->uid, ruid)) && - !(retval = put_user(cred->euid, euid))) + if (!(retval = put_user(cred->uid, ruid)) && + !(retval = put_user(cred->euid, euid))) retval = put_user(cred->suid, suid); return retval; @@ -796,11 +798,11 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); int retval; - if (!(retval = put_user(cred->gid, rgid)) && - !(retval = put_user(cred->egid, egid))) + if (!(retval = put_user(cred->gid, rgid)) && + !(retval = put_user(cred->egid, egid))) retval = put_user(cred->sgid, sgid); return retval; @@ -1199,7 +1201,7 @@ static void groups_sort(struct group_info *group_info) } /* a simple bsearch */ -int groups_search(struct group_info *group_info, gid_t grp) +int groups_search(const struct group_info *group_info, gid_t grp) { unsigned int left, right; @@ -1268,13 +1270,8 @@ EXPORT_SYMBOL(set_current_groups); asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) { - struct cred *cred = current->cred; - int i = 0; - - /* - * SMP: Nobody else can change our grouplist. Thus we are - * safe. - */ + const struct cred *cred = current_cred(); + int i; if (gidsetsize < 0) return -EINVAL; @@ -1330,8 +1327,9 @@ asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) */ int in_group_p(gid_t grp) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); int retval = 1; + if (grp != cred->fsgid) retval = groups_search(cred->group_info, grp); return retval; @@ -1341,8 +1339,9 @@ EXPORT_SYMBOL(in_group_p); int in_egroup_p(gid_t grp) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); int retval = 1; + if (grp != cred->egid) retval = groups_search(cred->group_info, grp); return retval; diff --git a/kernel/uid16.c b/kernel/uid16.c index 71f07fc39fea..2460c3199b5a 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -84,11 +84,12 @@ asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid) { + const struct cred *cred = current_cred(); int retval; - if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && - !(retval = put_user(high2lowuid(current->cred->euid), euid))) - retval = put_user(high2lowuid(current->cred->suid), suid); + if (!(retval = put_user(high2lowuid(cred->uid), ruid)) && + !(retval = put_user(high2lowuid(cred->euid), euid))) + retval = put_user(high2lowuid(cred->suid), suid); return retval; } @@ -104,11 +105,12 @@ asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid) { + const struct cred *cred = current_cred(); int retval; - if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && - !(retval = put_user(high2lowgid(current->cred->egid), egid))) - retval = put_user(high2lowgid(current->cred->sgid), sgid); + if (!(retval = put_user(high2lowgid(cred->gid), rgid)) && + !(retval = put_user(high2lowgid(cred->egid), egid))) + retval = put_user(high2lowgid(cred->sgid), sgid); return retval; } @@ -161,25 +163,24 @@ static int groups16_from_user(struct group_info *group_info, asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist) { - int i = 0; + const struct cred *cred = current_cred(); + int i; if (gidsetsize < 0) return -EINVAL; - get_group_info(current->cred->group_info); - i = current->cred->group_info->ngroups; + i = cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->cred->group_info)) { + if (groups16_to_user(grouplist, cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->cred->group_info); return i; } @@ -210,20 +211,20 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist) asmlinkage long sys_getuid16(void) { - return high2lowuid(current->cred->uid); + return high2lowuid(current_uid()); } asmlinkage long sys_geteuid16(void) { - return high2lowuid(current->cred->euid); + return high2lowuid(current_euid()); } asmlinkage long sys_getgid16(void) { - return high2lowgid(current->cred->gid); + return high2lowgid(current_gid()); } asmlinkage long sys_getegid16(void) { - return high2lowgid(current->cred->egid); + return high2lowgid(current_egid()); } diff --git a/net/core/scm.c b/net/core/scm.c index c28ca32a7d93..f73c44b17dda 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -44,7 +44,7 @@ static __inline__ int scm_check_creds(struct ucred *creds) { - struct cred *cred = current->cred; + const struct cred *cred = current_cred(); if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && ((creds->uid == cred->uid || creds->uid == cred->euid || diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index c79543212602..0443f8349458 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -350,16 +350,18 @@ EXPORT_SYMBOL_GPL(rpcauth_lookup_credcache); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *auth, int flags) { - struct auth_cred acred = { - .uid = current_fsuid(), - .gid = current_fsgid(), - .group_info = current->cred->group_info, - }; + struct auth_cred acred; struct rpc_cred *ret; + const struct cred *cred = current_cred(); dprintk("RPC: looking up %s cred\n", auth->au_ops->au_name); - get_group_info(acred.group_info); + + memset(&acred, 0, sizeof(acred)); + acred.uid = cred->fsuid; + acred.gid = cred->fsgid; + acred.group_info = get_group_info(((struct cred *)cred)->group_info); + ret = auth->au_ops->lookup_cred(auth, &acred, flags); put_group_info(acred.group_info); return ret; diff --git a/security/commoncap.c b/security/commoncap.c index fa61679f8c73..61307f590003 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -641,7 +641,7 @@ int cap_task_setnice (struct task_struct *p, int nice) int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5, long *rc_p) { - struct cred *cred = current->cred; + struct cred *cred = current_cred(); long error = 0; switch (option) { diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index b0904cdda2e7..ce8ac6073d57 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -582,7 +582,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, { struct request_key_auth *rka; struct task_struct *t = current; - struct cred *cred = t->cred; + struct cred *cred = current_cred(); struct key *key; key_ref_t key_ref, skey_ref; int ret; diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 3e9b9eb1dd28..0488b0af5bd6 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -67,6 +67,7 @@ static int call_sbin_request_key(struct key_construction *cons, void *aux) { struct task_struct *tsk = current; + const struct cred *cred = current_cred(); key_serial_t prkey, sskey; struct key *key = cons->key, *authkey = cons->authkey, *keyring; char *argv[9], *envp[3], uid_str[12], gid_str[12]; @@ -96,16 +97,16 @@ static int call_sbin_request_key(struct key_construction *cons, goto error_link; /* record the UID and GID */ - sprintf(uid_str, "%d", current_fsuid()); - sprintf(gid_str, "%d", current_fsgid()); + sprintf(uid_str, "%d", cred->fsuid); + sprintf(gid_str, "%d", cred->fsgid); /* we say which key is under construction */ sprintf(key_str, "%d", key->serial); /* we specify the process's default keyrings */ sprintf(keyring_str[0], "%d", - tsk->cred->thread_keyring ? - tsk->cred->thread_keyring->serial : 0); + cred->thread_keyring ? + cred->thread_keyring->serial : 0); prkey = 0; if (tsk->signal->process_keyring) @@ -118,7 +119,7 @@ static int call_sbin_request_key(struct key_construction *cons, sskey = rcu_dereference(tsk->signal->session_keyring)->serial; rcu_read_unlock(); } else { - sskey = tsk->cred->user->session_keyring->serial; + sskey = cred->user->session_keyring->serial; } sprintf(keyring_str[2], "%d", sskey); diff --git a/security/selinux/exports.c b/security/selinux/exports.c index cf02490cd1eb..c73aeaa008e8 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c @@ -39,9 +39,13 @@ EXPORT_SYMBOL_GPL(selinux_string_to_sid); int selinux_secmark_relabel_packet_permission(u32 sid) { if (selinux_enabled) { - struct task_security_struct *tsec = current->cred->security; + const struct task_security_struct *__tsec; + u32 tsid; - return avc_has_perm(tsec->sid, sid, SECCLASS_PACKET, + __tsec = current_security(); + tsid = __tsec->sid; + + return avc_has_perm(tsid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL); } return 0; diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index d7db76617b0e..c0eb72013d67 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -197,7 +197,7 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx, u32 sid) { int rc = 0; - struct task_security_struct *tsec = current->cred->security; + const struct task_security_struct *tsec = current_security(); struct xfrm_sec_ctx *ctx = NULL; char *ctx_str = NULL; u32 str_len; @@ -333,7 +333,7 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx) */ int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) { - struct task_security_struct *tsec = current->cred->security; + const struct task_security_struct *tsec = current_security(); int rc = 0; if (ctx) { @@ -378,7 +378,7 @@ void selinux_xfrm_state_free(struct xfrm_state *x) */ int selinux_xfrm_state_delete(struct xfrm_state *x) { - struct task_security_struct *tsec = current->cred->security; + const struct task_security_struct *tsec = current_security(); struct xfrm_sec_ctx *ctx = x->security; int rc = 0; diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index b6dd4fc0fb0b..247cec3b5a43 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -164,7 +164,7 @@ int smk_curacc(char *obj_label, u32 mode) { int rc; - rc = smk_access(current->cred->security, obj_label, mode); + rc = smk_access(current_security(), obj_label, mode); if (rc == 0) return 0; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index cc837314fb0e..e8a4fcb1ad04 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -143,7 +143,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp) static int smack_syslog(int type) { int rc; - char *sp = current->cred->security; + char *sp = current_security(); rc = cap_syslog(type); if (rc != 0) @@ -375,7 +375,7 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags) */ static int smack_inode_alloc_security(struct inode *inode) { - inode->i_security = new_inode_smack(current->cred->security); + inode->i_security = new_inode_smack(current_security()); if (inode->i_security == NULL) return -ENOMEM; return 0; @@ -820,7 +820,7 @@ static int smack_file_permission(struct file *file, int mask) */ static int smack_file_alloc_security(struct file *file) { - file->f_security = current->cred->security; + file->f_security = current_security(); return 0; } @@ -918,7 +918,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, */ static int smack_file_set_fowner(struct file *file) { - file->f_security = current->cred->security; + file->f_security = current_security(); return 0; } @@ -986,8 +986,7 @@ static int smack_file_receive(struct file *file) */ static int smack_cred_alloc_security(struct cred *cred) { - cred->security = current->cred->security; - + cred->security = current_security(); return 0; } @@ -1225,7 +1224,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode) */ static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags) { - char *csp = current->cred->security; + char *csp = current_security(); struct socket_smack *ssp; ssp = kzalloc(sizeof(struct socket_smack), gfp_flags); @@ -1450,7 +1449,7 @@ static int smack_flags_to_may(int flags) */ static int smack_msg_msg_alloc_security(struct msg_msg *msg) { - msg->security = current->cred->security; + msg->security = current_security(); return 0; } @@ -1486,7 +1485,7 @@ static int smack_shm_alloc_security(struct shmid_kernel *shp) { struct kern_ipc_perm *isp = &shp->shm_perm; - isp->security = current->cred->security; + isp->security = current_security(); return 0; } @@ -1595,7 +1594,7 @@ static int smack_sem_alloc_security(struct sem_array *sma) { struct kern_ipc_perm *isp = &sma->sem_perm; - isp->security = current->cred->security; + isp->security = current_security(); return 0; } @@ -1699,7 +1698,7 @@ static int smack_msg_queue_alloc_security(struct msg_queue *msq) { struct kern_ipc_perm *kisp = &msq->q_perm; - kisp->security = current->cred->security; + kisp->security = current_security(); return 0; } @@ -1854,7 +1853,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) struct super_block *sbp; struct superblock_smack *sbsp; struct inode_smack *isp; - char *csp = current->cred->security; + char *csp = current_security(); char *fetched; char *final; struct dentry *dp; @@ -2290,8 +2289,7 @@ static void smack_sock_graft(struct sock *sk, struct socket *parent) return; ssp = sk->sk_security; - ssp->smk_in = current->cred->security; - ssp->smk_out = current->cred->security; + ssp->smk_in = ssp->smk_out = current_security(); ssp->smk_packet[0] = '\0'; rc = smack_netlabel(sk); diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index c5ca279e0506..ca257dfdc75d 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -336,7 +336,7 @@ static void smk_cipso_doi(void) audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = smack_to_secid(current->cred->security); + audit_info.secid = smack_to_secid(current_security()); rc = netlbl_cfg_map_del(NULL, &audit_info); if (rc != 0) @@ -371,7 +371,7 @@ static void smk_unlbl_ambient(char *oldambient) audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = smack_to_secid(current->cred->security); + audit_info.secid = smack_to_secid(current_security()); if (oldambient != NULL) { rc = netlbl_cfg_map_del(oldambient, &audit_info); -- cgit v1.2.3 From c69e8d9c01db2adc503464993c358901c9af9de4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:19 +1100 Subject: CRED: Use RCU to access another task's creds and to release a task's own creds Use RCU to access another task's creds and to release a task's own creds. This means that it will be possible for the credentials of a task to be replaced without another task (a) requiring a full lock to read them, and (b) seeing deallocated memory. Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- arch/ia64/kernel/perfmon.c | 32 +++++++++++++--------- drivers/connector/cn_proc.c | 16 +++++++---- fs/binfmt_elf.c | 8 ++++-- fs/binfmt_elf_fdpic.c | 8 ++++-- fs/fcntl.c | 15 ++++++++--- fs/fuse/dir.c | 23 ++++++++++------ fs/ioprio.c | 14 +++++++--- fs/proc/array.c | 32 ++++++++++++++-------- fs/proc/base.c | 32 ++++++++++++++++------ include/linux/cred.h | 3 ++- kernel/auditsc.c | 33 +++++++++++++---------- kernel/cgroup.c | 16 +++++------ kernel/exit.c | 14 ++++++---- kernel/futex.c | 22 +++++++++------ kernel/futex_compat.c | 7 ++--- kernel/ptrace.c | 22 ++++++++------- kernel/sched.c | 31 ++++++++++++++------- kernel/signal.c | 49 ++++++++++++++++++++------------- kernel/sys.c | 11 +++++--- kernel/tsacct.c | 6 +++-- mm/mempolicy.c | 8 +++--- mm/migrate.c | 8 +++--- mm/oom_kill.c | 6 ++--- security/commoncap.c | 64 +++++++++++++++++++++++++++----------------- security/keys/permission.c | 10 ++++--- security/keys/process_keys.c | 24 ++++++++++------- security/selinux/selinuxfs.c | 13 ++++++--- security/smack/smack_lsm.c | 32 +++++++++++----------- 28 files changed, 355 insertions(+), 204 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index dd38db46a77a..0e499757309b 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2399,25 +2399,33 @@ error_kmem: static int pfm_bad_permissions(struct task_struct *task) { + const struct cred *tcred; uid_t uid = current_uid(); gid_t gid = current_gid(); + int ret; + + rcu_read_lock(); + tcred = __task_cred(task); /* inspired by ptrace_attach() */ DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", uid, gid, - task->euid, - task->suid, - task->uid, - task->egid, - task->sgid)); - - return (uid != task->euid) - || (uid != task->suid) - || (uid != task->uid) - || (gid != task->egid) - || (gid != task->sgid) - || (gid != task->gid)) && !capable(CAP_SYS_PTRACE); + tcred->euid, + tcred->suid, + tcred->uid, + tcred->egid, + tcred->sgid)); + + ret = ((uid != tcred->euid) + || (uid != tcred->suid) + || (uid != tcred->uid) + || (gid != tcred->egid) + || (gid != tcred->sgid) + || (gid != tcred->gid)) && !capable(CAP_SYS_PTRACE); + + rcu_read_unlock(); + return ret; } static int diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 354c1ff17159..c5afc98e2675 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -106,6 +106,7 @@ void proc_id_connector(struct task_struct *task, int which_id) struct proc_event *ev; __u8 buffer[CN_PROC_MSG_SIZE]; struct timespec ts; + const struct cred *cred; if (atomic_read(&proc_event_num_listeners) < 1) return; @@ -115,14 +116,19 @@ void proc_id_connector(struct task_struct *task, int which_id) ev->what = which_id; ev->event_data.id.process_pid = task->pid; ev->event_data.id.process_tgid = task->tgid; + rcu_read_lock(); + cred = __task_cred(task); if (which_id == PROC_EVENT_UID) { - ev->event_data.id.r.ruid = task->cred->uid; - ev->event_data.id.e.euid = task->cred->euid; + ev->event_data.id.r.ruid = cred->uid; + ev->event_data.id.e.euid = cred->euid; } else if (which_id == PROC_EVENT_GID) { - ev->event_data.id.r.rgid = task->cred->gid; - ev->event_data.id.e.egid = task->cred->egid; - } else + ev->event_data.id.r.rgid = cred->gid; + ev->event_data.id.e.egid = cred->egid; + } else { + rcu_read_unlock(); return; + } + rcu_read_unlock(); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0e6655613169..9142ff5dc8e6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1361,6 +1361,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, struct mm_struct *mm) { + const struct cred *cred; unsigned int i, len; /* first copy the parameters from user space */ @@ -1388,8 +1389,11 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_zomb = psinfo->pr_sname == 'Z'; psinfo->pr_nice = task_nice(p); psinfo->pr_flag = p->flags; - SET_UID(psinfo->pr_uid, p->cred->uid); - SET_GID(psinfo->pr_gid, p->cred->gid); + rcu_read_lock(); + cred = __task_cred(p); + SET_UID(psinfo->pr_uid, cred->uid); + SET_GID(psinfo->pr_gid, cred->gid); + rcu_read_unlock(); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); return 0; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 1f6e8c023b4c..45dabd59936f 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1414,6 +1414,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, struct mm_struct *mm) { + const struct cred *cred; unsigned int i, len; /* first copy the parameters from user space */ @@ -1441,8 +1442,11 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_zomb = psinfo->pr_sname == 'Z'; psinfo->pr_nice = task_nice(p); psinfo->pr_flag = p->flags; - SET_UID(psinfo->pr_uid, p->cred->uid); - SET_GID(psinfo->pr_gid, p->cred->gid); + rcu_read_lock(); + cred = __task_cred(p); + SET_UID(psinfo->pr_uid, cred->uid); + SET_GID(psinfo->pr_gid, cred->gid); + rcu_read_unlock(); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); return 0; diff --git a/fs/fcntl.c b/fs/fcntl.c index c594cc0e40fb..87c39f1f0817 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -401,10 +401,17 @@ static const long band_table[NSIGPOLL] = { static inline int sigio_perm(struct task_struct *p, struct fown_struct *fown, int sig) { - return (((fown->euid == 0) || - (fown->euid == p->cred->suid) || (fown->euid == p->cred->uid) || - (fown->uid == p->cred->suid) || (fown->uid == p->cred->uid)) && - !security_file_send_sigiotask(p, fown, sig)); + const struct cred *cred; + int ret; + + rcu_read_lock(); + cred = __task_cred(p); + ret = ((fown->euid == 0 || + fown->euid == cred->suid || fown->euid == cred->uid || + fown->uid == cred->suid || fown->uid == cred->uid) && + !security_file_send_sigiotask(p, fown, sig)); + rcu_read_unlock(); + return ret; } static void send_sigio_to_task(struct task_struct *p, diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e97a98981862..95bc22bdd060 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -869,18 +869,25 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, */ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) { + const struct cred *cred; + int ret; + if (fc->flags & FUSE_ALLOW_OTHER) return 1; - if (task->cred->euid == fc->user_id && - task->cred->suid == fc->user_id && - task->cred->uid == fc->user_id && - task->cred->egid == fc->group_id && - task->cred->sgid == fc->group_id && - task->cred->gid == fc->group_id) - return 1; + rcu_read_lock(); + ret = 0; + cred = __task_cred(task); + if (cred->euid == fc->user_id && + cred->suid == fc->user_id && + cred->uid == fc->user_id && + cred->egid == fc->group_id && + cred->sgid == fc->group_id && + cred->gid == fc->group_id) + ret = 1; + rcu_read_unlock(); - return 0; + return ret; } static int fuse_access(struct inode *inode, int mask) diff --git a/fs/ioprio.c b/fs/ioprio.c index 5112554fd210..3569e0ad86a2 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -31,10 +31,16 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) { int err; struct io_context *ioc; + const struct cred *cred = current_cred(), *tcred; - if (task->cred->uid != current_euid() && - task->cred->uid != current_uid() && !capable(CAP_SYS_NICE)) + rcu_read_lock(); + tcred = __task_cred(task); + if (tcred->uid != cred->euid && + tcred->uid != cred->uid && !capable(CAP_SYS_NICE)) { + rcu_read_unlock(); return -EPERM; + } + rcu_read_unlock(); err = security_task_setioprio(task, ioprio); if (err) @@ -131,7 +137,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; do_each_thread(g, p) { - if (p->cred->uid != who) + if (__task_cred(p)->uid != who) continue; ret = set_task_ioprio(p, ioprio); if (ret) @@ -224,7 +230,7 @@ asmlinkage long sys_ioprio_get(int which, int who) break; do_each_thread(g, p) { - if (p->cred->uid != user->uid) + if (__task_cred(p)->uid != user->uid) continue; tmpio = get_task_ioprio(p); if (tmpio < 0) diff --git a/fs/proc/array.c b/fs/proc/array.c index 62fe9b2009b6..7e4877d9dcb5 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -159,6 +159,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct group_info *group_info; int g; struct fdtable *fdt = NULL; + const struct cred *cred; pid_t ppid, tpid; rcu_read_lock(); @@ -170,6 +171,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, if (tracer) tpid = task_pid_nr_ns(tracer, ns); } + cred = get_cred((struct cred *) __task_cred(p)); seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" @@ -182,8 +184,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_tgid_nr_ns(p, ns), pid_nr_ns(pid, ns), ppid, tpid, - p->cred->uid, p->cred->euid, p->cred->suid, p->cred->fsuid, - p->cred->gid, p->cred->egid, p->cred->sgid, p->cred->fsgid); + cred->uid, cred->euid, cred->suid, cred->fsuid, + cred->gid, cred->egid, cred->sgid, cred->fsgid); task_lock(p); if (p->files) @@ -194,13 +196,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, fdt ? fdt->max_fds : 0); rcu_read_unlock(); - group_info = p->cred->group_info; - get_group_info(group_info); + group_info = cred->group_info; task_unlock(p); for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) seq_printf(m, "%d ", GROUP_AT(group_info, g)); - put_group_info(group_info); + put_cred(cred); seq_printf(m, "\n"); } @@ -262,7 +263,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) blocked = p->blocked; collect_sigign_sigcatch(p, &ignored, &caught); num_threads = atomic_read(&p->signal->count); - qsize = atomic_read(&p->cred->user->sigpending); + qsize = atomic_read(&__task_cred(p)->user->sigpending); qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; unlock_task_sighand(p, &flags); } @@ -293,12 +294,21 @@ static void render_cap_t(struct seq_file *m, const char *header, static inline void task_cap(struct seq_file *m, struct task_struct *p) { - struct cred *cred = p->cred; + const struct cred *cred; + kernel_cap_t cap_inheritable, cap_permitted, cap_effective, cap_bset; - render_cap_t(m, "CapInh:\t", &cred->cap_inheritable); - render_cap_t(m, "CapPrm:\t", &cred->cap_permitted); - render_cap_t(m, "CapEff:\t", &cred->cap_effective); - render_cap_t(m, "CapBnd:\t", &cred->cap_bset); + rcu_read_lock(); + cred = __task_cred(p); + cap_inheritable = cred->cap_inheritable; + cap_permitted = cred->cap_permitted; + cap_effective = cred->cap_effective; + cap_bset = cred->cap_bset; + rcu_read_unlock(); + + render_cap_t(m, "CapInh:\t", &cap_inheritable); + render_cap_t(m, "CapPrm:\t", &cap_permitted); + render_cap_t(m, "CapEff:\t", &cap_effective); + render_cap_t(m, "CapBnd:\t", &cap_bset); } static inline void task_context_switch_counts(struct seq_file *m, diff --git a/fs/proc/base.c b/fs/proc/base.c index 6862b360c36c..cf42c42cbfbb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1406,6 +1406,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st { struct inode * inode; struct proc_inode *ei; + const struct cred *cred; /* We need a new inode */ @@ -1428,8 +1429,11 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st inode->i_uid = 0; inode->i_gid = 0; if (task_dumpable(task)) { - inode->i_uid = task->cred->euid; - inode->i_gid = task->cred->egid; + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); } security_task_to_inode(task, inode); @@ -1445,6 +1449,8 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat { struct inode *inode = dentry->d_inode; struct task_struct *task; + const struct cred *cred; + generic_fillattr(inode, stat); rcu_read_lock(); @@ -1454,8 +1460,9 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - stat->uid = task->cred->euid; - stat->gid = task->cred->egid; + cred = __task_cred(task); + stat->uid = cred->euid; + stat->gid = cred->egid; } } rcu_read_unlock(); @@ -1483,11 +1490,16 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; struct task_struct *task = get_proc_task(inode); + const struct cred *cred; + if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - inode->i_uid = task->cred->euid; - inode->i_gid = task->cred->egid; + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); } else { inode->i_uid = 0; inode->i_gid = 0; @@ -1649,6 +1661,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) struct task_struct *task = get_proc_task(inode); int fd = proc_fd(inode); struct files_struct *files; + const struct cred *cred; if (task) { files = get_files_struct(task); @@ -1658,8 +1671,11 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) rcu_read_unlock(); put_files_struct(files); if (task_dumpable(task)) { - inode->i_uid = task->cred->euid; - inode->i_gid = task->cred->egid; + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); } else { inode->i_uid = 0; inode->i_gid = 0; diff --git a/include/linux/cred.h b/include/linux/cred.h index 4221ec6000c1..166ce4ddba64 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -147,8 +147,9 @@ static inline struct cred *get_cred(struct cred *cred) * Release a reference to a set of credentials, deleting them when the last ref * is released. */ -static inline void put_cred(struct cred *cred) +static inline void put_cred(const struct cred *_cred) { + struct cred *cred = (struct cred *) _cred; if (atomic_dec_and_test(&(cred)->usage)) __put_cred(cred); } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 2febf5165fad..ae8ef88ade3f 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -447,7 +447,7 @@ static int audit_filter_rules(struct task_struct *tsk, struct audit_names *name, enum audit_state *state) { - struct cred *cred = tsk->cred; + const struct cred *cred = get_task_cred(tsk); int i, j, need_sid = 1; u32 sid; @@ -642,8 +642,10 @@ static int audit_filter_rules(struct task_struct *tsk, break; } - if (!result) + if (!result) { + put_cred(cred); return 0; + } } if (rule->filterkey && ctx) ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC); @@ -651,6 +653,7 @@ static int audit_filter_rules(struct task_struct *tsk, case AUDIT_NEVER: *state = AUDIT_DISABLED; break; case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; } + put_cred(cred); return 1; } @@ -1229,7 +1232,7 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { - struct cred *cred = tsk->cred; + const struct cred *cred; int i, call_panic = 0; struct audit_buffer *ab; struct audit_aux_data *aux; @@ -1239,13 +1242,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->pid = tsk->pid; if (!context->ppid) context->ppid = sys_getppid(); - context->uid = cred->uid; - context->gid = cred->gid; - context->euid = cred->euid; - context->suid = cred->suid; + cred = current_cred(); + context->uid = cred->uid; + context->gid = cred->gid; + context->euid = cred->euid; + context->suid = cred->suid; context->fsuid = cred->fsuid; - context->egid = cred->egid; - context->sgid = cred->sgid; + context->egid = cred->egid; + context->sgid = cred->sgid; context->fsgid = cred->fsgid; context->personality = tsk->personality; @@ -2088,7 +2092,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid) audit_log_format(ab, "login pid=%d uid=%u " "old auid=%u new auid=%u" " old ses=%u new ses=%u", - task->pid, task->cred->uid, + task->pid, task_uid(task), task->loginuid, loginuid, task->sessionid, sessionid); audit_log_end(ab); @@ -2471,7 +2475,7 @@ void __audit_ptrace(struct task_struct *t) context->target_pid = t->pid; context->target_auid = audit_get_loginuid(t); - context->target_uid = t->cred->uid; + context->target_uid = task_uid(t); context->target_sessionid = audit_get_sessionid(t); security_task_getsecid(t, &context->target_sid); memcpy(context->target_comm, t->comm, TASK_COMM_LEN); @@ -2490,6 +2494,7 @@ int __audit_signal_info(int sig, struct task_struct *t) struct audit_aux_data_pids *axp; struct task_struct *tsk = current; struct audit_context *ctx = tsk->audit_context; + uid_t uid = current_uid(), t_uid = task_uid(t); if (audit_pid && t->tgid == audit_pid) { if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) { @@ -2497,7 +2502,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (tsk->loginuid != -1) audit_sig_uid = tsk->loginuid; else - audit_sig_uid = tsk->cred->uid; + audit_sig_uid = uid; security_task_getsecid(tsk, &audit_sig_sid); } if (!audit_signals || audit_dummy_context()) @@ -2509,7 +2514,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (!ctx->target_pid) { ctx->target_pid = t->tgid; ctx->target_auid = audit_get_loginuid(t); - ctx->target_uid = t->cred->uid; + ctx->target_uid = t_uid; ctx->target_sessionid = audit_get_sessionid(t); security_task_getsecid(t, &ctx->target_sid); memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN); @@ -2530,7 +2535,7 @@ int __audit_signal_info(int sig, struct task_struct *t) axp->target_pid[axp->pid_count] = t->tgid; axp->target_auid[axp->pid_count] = audit_get_loginuid(t); - axp->target_uid[axp->pid_count] = t->cred->uid; + axp->target_uid[axp->pid_count] = t_uid; axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t); security_task_getsecid(t, &axp->target_sid[axp->pid_count]); memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e210526e6401..a512a75a5560 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1279,7 +1279,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) { struct task_struct *tsk; - uid_t euid; + const struct cred *cred = current_cred(), *tcred; int ret; if (pid) { @@ -1289,16 +1289,16 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) rcu_read_unlock(); return -ESRCH; } - get_task_struct(tsk); - rcu_read_unlock(); - euid = current_euid(); - if (euid && - euid != tsk->cred->uid && - euid != tsk->cred->suid) { - put_task_struct(tsk); + tcred = __task_cred(tsk); + if (cred->euid && + cred->euid != tcred->uid && + cred->euid != tcred->suid) { + rcu_read_unlock(); return -EACCES; } + get_task_struct(tsk); + rcu_read_unlock(); } else { tsk = current; get_task_struct(tsk); diff --git a/kernel/exit.c b/kernel/exit.c index e0f6e1892fb9..bbc22530f2c1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -160,7 +160,10 @@ void release_task(struct task_struct * p) int zap_leader; repeat: tracehook_prepare_release_task(p); - atomic_dec(&p->cred->user->processes); + /* don't need to get the RCU readlock here - the process is dead and + * can't be modifying its own credentials */ + atomic_dec(&__task_cred(p)->user->processes); + proc_flush_task(p); write_lock_irq(&tasklist_lock); tracehook_finish_release_task(p); @@ -1267,12 +1270,12 @@ static int wait_task_zombie(struct task_struct *p, int options, unsigned long state; int retval, status, traced; pid_t pid = task_pid_vnr(p); + uid_t uid = __task_cred(p)->uid; if (!likely(options & WEXITED)) return 0; if (unlikely(options & WNOWAIT)) { - uid_t uid = p->cred->uid; int exit_code = p->exit_code; int why, status; @@ -1393,7 +1396,7 @@ static int wait_task_zombie(struct task_struct *p, int options, if (!retval && infop) retval = put_user(pid, &infop->si_pid); if (!retval && infop) - retval = put_user(p->cred->uid, &infop->si_uid); + retval = put_user(uid, &infop->si_uid); if (!retval) retval = pid; @@ -1458,7 +1461,8 @@ static int wait_task_stopped(int ptrace, struct task_struct *p, if (!unlikely(options & WNOWAIT)) p->exit_code = 0; - uid = p->cred->uid; + /* don't need the RCU readlock here as we're holding a spinlock */ + uid = __task_cred(p)->uid; unlock_sig: spin_unlock_irq(&p->sighand->siglock); if (!exit_code) @@ -1532,10 +1536,10 @@ static int wait_task_continued(struct task_struct *p, int options, } if (!unlikely(options & WNOWAIT)) p->signal->flags &= ~SIGNAL_STOP_CONTINUED; + uid = __task_cred(p)->uid; spin_unlock_irq(&p->sighand->siglock); pid = task_pid_vnr(p); - uid = p->cred->uid; get_task_struct(p); read_unlock(&tasklist_lock); diff --git a/kernel/futex.c b/kernel/futex.c index 28421d8210b8..4fe790e89d0f 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -439,15 +439,20 @@ static void free_pi_state(struct futex_pi_state *pi_state) static struct task_struct * futex_find_get_task(pid_t pid) { struct task_struct *p; - uid_t euid = current_euid(); + const struct cred *cred = current_cred(), *pcred; rcu_read_lock(); p = find_task_by_vpid(pid); - if (!p || (euid != p->cred->euid && - euid != p->cred->uid)) + if (!p) { p = ERR_PTR(-ESRCH); - else - get_task_struct(p); + } else { + pcred = __task_cred(p); + if (cred->euid != pcred->euid && + cred->euid != pcred->uid) + p = ERR_PTR(-ESRCH); + else + get_task_struct(p); + } rcu_read_unlock(); @@ -1831,7 +1836,7 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, { struct robust_list_head __user *head; unsigned long ret; - uid_t euid = current_euid(); + const struct cred *cred = current_cred(), *pcred; if (!futex_cmpxchg_enabled) return -ENOSYS; @@ -1847,8 +1852,9 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if (euid != p->cred->euid && - euid != p->cred->uid && + pcred = __task_cred(p); + if (cred->euid != pcred->euid && + cred->euid != pcred->uid && !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->robust_list; diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 2c3fd5ed34f5..d607a5b9ee29 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -135,7 +135,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, { struct compat_robust_list_head __user *head; unsigned long ret; - uid_t euid = current_euid(); + const struct cred *cred = current_cred(), *pcred; if (!futex_cmpxchg_enabled) return -ENOSYS; @@ -151,8 +151,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if (euid != p->cred->euid && - euid != p->cred->uid && + pcred = __task_cred(p); + if (cred->euid != pcred->euid && + cred->euid != pcred->uid && !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->compat_robust_list; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 49849d12dd12..b9d5f4e4f6a4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -115,7 +115,7 @@ int ptrace_check_attach(struct task_struct *child, int kill) int __ptrace_may_access(struct task_struct *task, unsigned int mode) { - struct cred *cred = current->cred, *tcred = task->cred; + const struct cred *cred = current_cred(), *tcred; /* May we inspect the given task? * This check is used both for attaching with ptrace @@ -125,19 +125,23 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ - uid_t uid = cred->uid; - gid_t gid = cred->gid; int dumpable = 0; /* Don't let security modules deny introspection */ if (task == current) return 0; - if ((uid != tcred->euid || - uid != tcred->suid || - uid != tcred->uid || - gid != tcred->egid || - gid != tcred->sgid || - gid != tcred->gid) && !capable(CAP_SYS_PTRACE)) + rcu_read_lock(); + tcred = __task_cred(task); + if ((cred->uid != tcred->euid || + cred->uid != tcred->suid || + cred->uid != tcred->uid || + cred->gid != tcred->egid || + cred->gid != tcred->sgid || + cred->gid != tcred->gid) && + !capable(CAP_SYS_PTRACE)) { + rcu_read_unlock(); return -EPERM; + } + rcu_read_unlock(); smp_rmb(); if (task->mm) dumpable = get_dumpable(task->mm); diff --git a/kernel/sched.c b/kernel/sched.c index 733c59e645aa..92992e287b10 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -345,7 +345,9 @@ static inline struct task_group *task_group(struct task_struct *p) struct task_group *tg; #ifdef CONFIG_USER_SCHED - tg = p->cred->user->tg; + rcu_read_lock(); + tg = __task_cred(p)->user->tg; + rcu_read_unlock(); #elif defined(CONFIG_CGROUP_SCHED) tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), struct task_group, css); @@ -5121,6 +5123,22 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) set_load_weight(p); } +/* + * check the target process has a UID that matches the current process's + */ +static bool check_same_owner(struct task_struct *p) +{ + const struct cred *cred = current_cred(), *pcred; + bool match; + + rcu_read_lock(); + pcred = __task_cred(p); + match = (cred->euid == pcred->euid || + cred->euid == pcred->uid); + rcu_read_unlock(); + return match; +} + static int __sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param, bool user) { @@ -5128,7 +5146,6 @@ static int __sched_setscheduler(struct task_struct *p, int policy, unsigned long flags; const struct sched_class *prev_class = p->sched_class; struct rq *rq; - uid_t euid; /* may grab non-irq protected spin_locks */ BUG_ON(in_interrupt()); @@ -5181,9 +5198,7 @@ recheck: return -EPERM; /* can't change other user's priorities */ - euid = current_euid(); - if (euid != p->cred->euid && - euid != p->cred->uid) + if (!check_same_owner(p)) return -EPERM; } @@ -5394,7 +5409,6 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) cpumask_t cpus_allowed; cpumask_t new_mask = *in_mask; struct task_struct *p; - uid_t euid; int retval; get_online_cpus(); @@ -5415,11 +5429,8 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) get_task_struct(p); read_unlock(&tasklist_lock); - euid = current_euid(); retval = -EPERM; - if (euid != p->cred->euid && - euid != p->cred->uid && - !capable(CAP_SYS_NICE)) + if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) goto out_unlock; retval = security_task_setscheduler(p, 0, NULL); diff --git a/kernel/signal.c b/kernel/signal.c index 80e8a6489f97..84989124bafb 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -177,6 +177,11 @@ int next_signal(struct sigpending *pending, sigset_t *mask) return sig; } +/* + * allocate a new signal queue record + * - this may be called without locks if and only if t == current, otherwise an + * appopriate lock must be held to protect t's user_struct + */ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, int override_rlimit) { @@ -184,11 +189,12 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, struct user_struct *user; /* - * In order to avoid problems with "switch_user()", we want to make - * sure that the compiler doesn't re-load "t->user" + * We won't get problems with the target's UID changing under us + * because changing it requires RCU be used, and if t != current, the + * caller must be holding the RCU readlock (by way of a spinlock) and + * we use RCU protection here */ - user = t->cred->user; - barrier(); + user = __task_cred(t)->user; atomic_inc(&user->sigpending); if (override_rlimit || atomic_read(&user->sigpending) <= @@ -562,12 +568,13 @@ static int rm_from_queue(unsigned long mask, struct sigpending *s) /* * Bad permissions for sending the signal + * - the caller must hold at least the RCU read lock */ static int check_kill_permission(int sig, struct siginfo *info, struct task_struct *t) { + const struct cred *cred = current_cred(), *tcred; struct pid *sid; - uid_t uid, euid; int error; if (!valid_signal(sig)) @@ -580,10 +587,11 @@ static int check_kill_permission(int sig, struct siginfo *info, if (error) return error; - uid = current_uid(); - euid = current_euid(); - if ((euid ^ t->cred->suid) && (euid ^ t->cred->uid) && - (uid ^ t->cred->suid) && (uid ^ t->cred->uid) && + tcred = __task_cred(t); + if ((cred->euid ^ tcred->suid) && + (cred->euid ^ tcred->uid) && + (cred->uid ^ tcred->suid) && + (cred->uid ^ tcred->uid) && !capable(CAP_KILL)) { switch (sig) { case SIGCONT: @@ -1011,6 +1019,10 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long return sighand; } +/* + * send signal info to all the members of a group + * - the caller must hold the RCU read lock at least + */ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) { unsigned long flags; @@ -1032,8 +1044,8 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) /* * __kill_pgrp_info() sends a signal to a process group: this is what the tty * control characters do (^C, ^Z etc) + * - the caller must hold at least a readlock on tasklist_lock */ - int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp) { struct task_struct *p = NULL; @@ -1089,6 +1101,7 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, { int ret = -EINVAL; struct task_struct *p; + const struct cred *pcred; if (!valid_signal(sig)) return ret; @@ -1099,9 +1112,11 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, ret = -ESRCH; goto out_unlock; } - if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) - && (euid != p->cred->suid) && (euid != p->cred->uid) - && (uid != p->cred->suid) && (uid != p->cred->uid)) { + pcred = __task_cred(p); + if ((info == SEND_SIG_NOINFO || + (!is_si_special(info) && SI_FROMUSER(info))) && + euid != pcred->suid && euid != pcred->uid && + uid != pcred->suid && uid != pcred->uid) { ret = -EPERM; goto out_unlock; } @@ -1372,10 +1387,9 @@ int do_notify_parent(struct task_struct *tsk, int sig) */ rcu_read_lock(); info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); + info.si_uid = __task_cred(tsk)->uid; rcu_read_unlock(); - info.si_uid = tsk->cred->uid; - thread_group_cputime(tsk, &cputime); info.si_utime = cputime_to_jiffies(cputime.utime); info.si_stime = cputime_to_jiffies(cputime.stime); @@ -1443,10 +1457,9 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) */ rcu_read_lock(); info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); + info.si_uid = __task_cred(tsk)->uid; rcu_read_unlock(); - info.si_uid = tsk->cred->uid; - info.si_utime = cputime_to_clock_t(tsk->utime); info.si_stime = cputime_to_clock_t(tsk->stime); @@ -1713,7 +1726,7 @@ static int ptrace_signal(int signr, siginfo_t *info, info->si_errno = 0; info->si_code = SI_USER; info->si_pid = task_pid_vnr(current->parent); - info->si_uid = current->parent->cred->uid; + info->si_uid = task_uid(current->parent); } /* If the (new) signal is now blocked, requeue it. */ diff --git a/kernel/sys.c b/kernel/sys.c index c4d6b59553e9..ccc9eb736d35 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -112,14 +112,17 @@ EXPORT_SYMBOL(cad_pid); void (*pm_power_off_prepare)(void); +/* + * set the priority of a task + * - the caller must hold the RCU read lock + */ static int set_one_prio(struct task_struct *p, int niceval, int error) { - uid_t euid = current_euid(); + const struct cred *cred = current_cred(), *pcred = __task_cred(p); int no_nice; - if (p->cred->uid != euid && - p->cred->euid != euid && - !capable(CAP_SYS_NICE)) { + if (pcred->uid != cred->euid && + pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) { error = -EPERM; goto out; } diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 6d1ed07bf312..2dc06ab35716 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -27,6 +27,7 @@ */ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) { + const struct cred *tcred; struct timespec uptime, ts; u64 ac_etime; @@ -53,10 +54,11 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) stats->ac_flag |= AXSIG; stats->ac_nice = task_nice(tsk); stats->ac_sched = tsk->policy; - stats->ac_uid = tsk->cred->uid; - stats->ac_gid = tsk->cred->gid; stats->ac_pid = tsk->pid; rcu_read_lock(); + tcred = __task_cred(tsk); + stats->ac_uid = tcred->uid; + stats->ac_gid = tcred->gid; stats->ac_ppid = pid_alive(tsk) ? rcu_dereference(tsk->real_parent)->tgid : 0; rcu_read_unlock(); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b23492ee3e50..7555219c535b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1110,7 +1110,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *old_nodes, const unsigned long __user *new_nodes) { - struct cred *cred, *tcred; + const struct cred *cred = current_cred(), *tcred; struct mm_struct *mm; struct task_struct *task; nodemask_t old; @@ -1145,14 +1145,16 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, * capabilities, superuser privileges or the same * userid as the target process. */ - cred = current->cred; - tcred = task->cred; + rcu_read_lock(); + tcred = __task_cred(task); if (cred->euid != tcred->suid && cred->euid != tcred->uid && cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { + rcu_read_unlock(); err = -EPERM; goto out; } + rcu_read_unlock(); task_nodes = cpuset_mems_allowed(task); /* Is the user allowed to access the target nodes? */ diff --git a/mm/migrate.c b/mm/migrate.c index 794443da1b4f..142284229ce2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1045,7 +1045,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, const int __user *nodes, int __user *status, int flags) { - struct cred *cred, *tcred; + const struct cred *cred = current_cred(), *tcred; struct task_struct *task; struct mm_struct *mm; int err; @@ -1076,14 +1076,16 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, * capabilities, superuser privileges or the same * userid as the target process. */ - cred = current->cred; - tcred = task->cred; + rcu_read_lock(); + tcred = __task_cred(task); if (cred->euid != tcred->suid && cred->euid != tcred->uid && cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { + rcu_read_unlock(); err = -EPERM; goto out; } + rcu_read_unlock(); err = security_task_movememory(task); if (err) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3af787ba2077..0e0b282a2073 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -298,9 +298,9 @@ static void dump_tasks(const struct mem_cgroup *mem) task_lock(p); printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", - p->pid, p->cred->uid, p->tgid, p->mm->total_vm, - get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj, - p->comm); + p->pid, __task_cred(p)->uid, p->tgid, + p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p), + p->oomkilladj, p->comm); task_unlock(p); } while_each_thread(g, p); } diff --git a/security/commoncap.c b/security/commoncap.c index 61307f590003..0384bf95db68 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -51,10 +51,13 @@ EXPORT_SYMBOL(cap_netlink_recv); */ int cap_capable(struct task_struct *tsk, int cap, int audit) { + __u32 cap_raised; + /* Derived from include/linux/sched.h:capable. */ - if (cap_raised(tsk->cred->cap_effective, cap)) - return 0; - return -EPERM; + rcu_read_lock(); + cap_raised = cap_raised(__task_cred(tsk)->cap_effective, cap); + rcu_read_unlock(); + return cap_raised ? 0 : -EPERM; } int cap_settime(struct timespec *ts, struct timezone *tz) @@ -66,34 +69,42 @@ int cap_settime(struct timespec *ts, struct timezone *tz) int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) { - /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ - if (cap_issubset(child->cred->cap_permitted, - current->cred->cap_permitted)) - return 0; - if (capable(CAP_SYS_PTRACE)) - return 0; - return -EPERM; + int ret = 0; + + rcu_read_lock(); + if (!cap_issubset(child->cred->cap_permitted, + current->cred->cap_permitted) && + !capable(CAP_SYS_PTRACE)) + ret = -EPERM; + rcu_read_unlock(); + return ret; } int cap_ptrace_traceme(struct task_struct *parent) { - if (cap_issubset(current->cred->cap_permitted, - parent->cred->cap_permitted)) - return 0; - if (has_capability(parent, CAP_SYS_PTRACE)) - return 0; - return -EPERM; + int ret = 0; + + rcu_read_lock(); + if (!cap_issubset(current->cred->cap_permitted, + parent->cred->cap_permitted) && + !has_capability(parent, CAP_SYS_PTRACE)) + ret = -EPERM; + rcu_read_unlock(); + return ret; } int cap_capget (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - struct cred *cred = target->cred; + const struct cred *cred; /* Derived from kernel/capability.c:sys_capget. */ + rcu_read_lock(); + cred = __task_cred(target); *effective = cred->cap_effective; *inheritable = cred->cap_inheritable; *permitted = cred->cap_permitted; + rcu_read_unlock(); return 0; } @@ -433,7 +444,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) int cap_bprm_secureexec (struct linux_binprm *bprm) { - const struct cred *cred = current->cred; + const struct cred *cred = current_cred(); if (cred->uid != 0) { if (bprm->cap_effective) @@ -511,11 +522,11 @@ static inline void cap_emulate_setxuid (int old_ruid, int old_euid, if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && (cred->uid != 0 && cred->euid != 0 && cred->suid != 0) && !issecure(SECURE_KEEP_CAPS)) { - cap_clear (cred->cap_permitted); - cap_clear (cred->cap_effective); + cap_clear(cred->cap_permitted); + cap_clear(cred->cap_effective); } if (old_euid == 0 && cred->euid != 0) { - cap_clear (cred->cap_effective); + cap_clear(cred->cap_effective); } if (old_euid != 0 && cred->euid == 0) { cred->cap_effective = cred->cap_permitted; @@ -582,9 +593,14 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, */ static int cap_safe_nice(struct task_struct *p) { - if (!cap_issubset(p->cred->cap_permitted, - current->cred->cap_permitted) && - !capable(CAP_SYS_NICE)) + int is_subset; + + rcu_read_lock(); + is_subset = cap_issubset(__task_cred(p)->cap_permitted, + current_cred()->cap_permitted); + rcu_read_unlock(); + + if (!is_subset && !capable(CAP_SYS_NICE)) return -EPERM; return 0; } diff --git a/security/keys/permission.c b/security/keys/permission.c index baf3d5f31e71..13c36164f284 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -22,13 +22,16 @@ int key_task_permission(const key_ref_t key_ref, struct task_struct *context, key_perm_t perm) { - struct cred *cred = context->cred; + const struct cred *cred; struct key *key; key_perm_t kperm; int ret; key = key_ref_to_ptr(key_ref); + rcu_read_lock(); + cred = __task_cred(context); + /* use the second 8-bits of permissions for keys the caller owns */ if (key->uid == cred->fsuid) { kperm = key->perm >> 16; @@ -43,10 +46,7 @@ int key_task_permission(const key_ref_t key_ref, goto use_these_perms; } - spin_lock(&cred->lock); ret = groups_search(cred->group_info, key->gid); - spin_unlock(&cred->lock); - if (ret) { kperm = key->perm >> 8; goto use_these_perms; @@ -57,6 +57,8 @@ int key_task_permission(const key_ref_t key_ref, kperm = key->perm; use_these_perms: + rcu_read_lock(); + /* use the top 8-bits of permissions for keys the caller possesses * - possessor permissions are additive with other permissions */ diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index ce8ac6073d57..212601ebaa46 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -412,10 +412,13 @@ key_ref_t search_process_keyrings(struct key_type *type, struct task_struct *context) { struct request_key_auth *rka; + struct cred *cred; key_ref_t key_ref, ret, err; might_sleep(); + cred = get_task_cred(context); + /* we want to return -EAGAIN or -ENOKEY if any of the keyrings were * searchable, but we failed to find a key or we found a negative key; * otherwise we want to return a sample error (probably -EACCES) if @@ -428,9 +431,9 @@ key_ref_t search_process_keyrings(struct key_type *type, err = ERR_PTR(-EAGAIN); /* search the thread keyring first */ - if (context->cred->thread_keyring) { + if (cred->thread_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->cred->thread_keyring, 1), + make_key_ref(cred->thread_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -495,9 +498,9 @@ key_ref_t search_process_keyrings(struct key_type *type, } } /* or search the user-session keyring */ - else if (context->cred->user->session_keyring) { + else if (cred->user->session_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->cred->user->session_keyring, 1), + make_key_ref(cred->user->session_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -519,20 +522,20 @@ key_ref_t search_process_keyrings(struct key_type *type, * search the keyrings of the process mentioned there * - we don't permit access to request_key auth keys via this method */ - if (context->cred->request_key_auth && + if (cred->request_key_auth && context == current && type != &key_type_request_key_auth ) { /* defend against the auth key being revoked */ - down_read(&context->cred->request_key_auth->sem); + down_read(&cred->request_key_auth->sem); - if (key_validate(context->cred->request_key_auth) == 0) { - rka = context->cred->request_key_auth->payload.data; + if (key_validate(cred->request_key_auth) == 0) { + rka = cred->request_key_auth->payload.data; key_ref = search_process_keyrings(type, description, match, rka->context); - up_read(&context->cred->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); if (!IS_ERR(key_ref)) goto found; @@ -549,7 +552,7 @@ key_ref_t search_process_keyrings(struct key_type *type, break; } } else { - up_read(&context->cred->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); } } @@ -557,6 +560,7 @@ key_ref_t search_process_keyrings(struct key_type *type, key_ref = ret ? ret : err; found: + put_cred(cred); return key_ref; } /* end search_process_keyrings() */ diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 10715d1330b9..c86303638235 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -95,13 +95,18 @@ extern void selnl_notify_setenforce(int val); static int task_has_security(struct task_struct *tsk, u32 perms) { - struct task_security_struct *tsec; - - tsec = tsk->cred->security; + const struct task_security_struct *tsec; + u32 sid = 0; + + rcu_read_lock(); + tsec = __task_cred(tsk)->security; + if (tsec) + sid = tsec->sid; + rcu_read_unlock(); if (!tsec) return -EACCES; - return avc_has_perm(tsec->sid, SECINITSID_SECURITY, + return avc_has_perm(sid, SECINITSID_SECURITY, SECCLASS_SECURITY, perms, NULL); } diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index e8a4fcb1ad04..11167fd567b9 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -30,6 +30,8 @@ #include "smack.h" +#define task_security(task) (task_cred_xxx((task), security)) + /* * I hope these are the hokeyist lines of code in the module. Casey. */ @@ -1012,7 +1014,7 @@ static void smack_cred_free(struct cred *cred) */ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) { - return smk_curacc(p->cred->security, MAY_WRITE); + return smk_curacc(task_security(p), MAY_WRITE); } /** @@ -1023,7 +1025,7 @@ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) */ static int smack_task_getpgid(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_READ); + return smk_curacc(task_security(p), MAY_READ); } /** @@ -1034,7 +1036,7 @@ static int smack_task_getpgid(struct task_struct *p) */ static int smack_task_getsid(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_READ); + return smk_curacc(task_security(p), MAY_READ); } /** @@ -1046,7 +1048,7 @@ static int smack_task_getsid(struct task_struct *p) */ static void smack_task_getsecid(struct task_struct *p, u32 *secid) { - *secid = smack_to_secid(p->cred->security); + *secid = smack_to_secid(task_security(p)); } /** @@ -1062,7 +1064,7 @@ static int smack_task_setnice(struct task_struct *p, int nice) rc = cap_task_setnice(p, nice); if (rc == 0) - rc = smk_curacc(p->cred->security, MAY_WRITE); + rc = smk_curacc(task_security(p), MAY_WRITE); return rc; } @@ -1079,7 +1081,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) rc = cap_task_setioprio(p, ioprio); if (rc == 0) - rc = smk_curacc(p->cred->security, MAY_WRITE); + rc = smk_curacc(task_security(p), MAY_WRITE); return rc; } @@ -1091,7 +1093,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) */ static int smack_task_getioprio(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_READ); + return smk_curacc(task_security(p), MAY_READ); } /** @@ -1109,7 +1111,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, rc = cap_task_setscheduler(p, policy, lp); if (rc == 0) - rc = smk_curacc(p->cred->security, MAY_WRITE); + rc = smk_curacc(task_security(p), MAY_WRITE); return rc; } @@ -1121,7 +1123,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, */ static int smack_task_getscheduler(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_READ); + return smk_curacc(task_security(p), MAY_READ); } /** @@ -1132,7 +1134,7 @@ static int smack_task_getscheduler(struct task_struct *p) */ static int smack_task_movememory(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_WRITE); + return smk_curacc(task_security(p), MAY_WRITE); } /** @@ -1155,13 +1157,13 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info, * can write the receiver. */ if (secid == 0) - return smk_curacc(p->cred->security, MAY_WRITE); + return smk_curacc(task_security(p), MAY_WRITE); /* * If the secid isn't 0 we're dealing with some USB IO * specific behavior. This is not clean. For one thing * we can't take privilege into account. */ - return smk_access(smack_from_secid(secid), p->cred->security, MAY_WRITE); + return smk_access(smack_from_secid(secid), task_security(p), MAY_WRITE); } /** @@ -1174,7 +1176,7 @@ static int smack_task_wait(struct task_struct *p) { int rc; - rc = smk_access(current->cred->security, p->cred->security, MAY_WRITE); + rc = smk_access(current_security(), task_security(p), MAY_WRITE); if (rc == 0) return 0; @@ -1205,7 +1207,7 @@ static int smack_task_wait(struct task_struct *p) static void smack_task_to_inode(struct task_struct *p, struct inode *inode) { struct inode_smack *isp = inode->i_security; - isp->smk_inode = p->cred->security; + isp->smk_inode = task_security(p); } /* @@ -2010,7 +2012,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value) if (strcmp(name, "current") != 0) return -EINVAL; - cp = kstrdup(p->cred->security, GFP_KERNEL); + cp = kstrdup(task_security(p), GFP_KERNEL); if (cp == NULL) return -ENOMEM; -- cgit v1.2.3 From bb952bb98a7e479262c7eb25d5592545a3af147d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:20 +1100 Subject: CRED: Separate per-task-group keyrings from signal_struct Separate per-task-group keyrings from signal_struct and dangle their anchor from the cred struct rather than the signal_struct. Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: James Morris --- include/linux/cred.h | 16 +++++++ include/linux/key.h | 8 +--- include/linux/sched.h | 6 --- kernel/cred.c | 63 +++++++++++++++++++++++++++ kernel/fork.c | 7 --- security/keys/process_keys.c | 100 +++++++++++++++++-------------------------- security/keys/request_key.c | 34 ++++++--------- 7 files changed, 135 insertions(+), 99 deletions(-) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index 166ce4ddba64..62b9e532422d 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -71,6 +71,21 @@ extern int groups_search(const struct group_info *, gid_t); extern int in_group_p(gid_t); extern int in_egroup_p(gid_t); +/* + * The common credentials for a thread group + * - shared by CLONE_THREAD + */ +#ifdef CONFIG_KEYS +struct thread_group_cred { + atomic_t usage; + pid_t tgid; /* thread group process ID */ + spinlock_t lock; + struct key *session_keyring; /* keyring inherited over fork */ + struct key *process_keyring; /* keyring private to this process */ + struct rcu_head rcu; /* RCU deletion hook */ +}; +#endif + /* * The security context of a task * @@ -114,6 +129,7 @@ struct cred { * keys to */ struct key *thread_keyring; /* keyring private to this thread */ struct key *request_key_auth; /* assumed request_key authority */ + struct thread_group_cred *tgcred; /* thread-group shared credentials */ #endif #ifdef CONFIG_SECURITY void *security; /* subjective LSM security */ diff --git a/include/linux/key.h b/include/linux/key.h index df709e1af3cd..0836cc838b0c 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -278,9 +278,7 @@ extern ctl_table key_sysctls[]; */ extern void switch_uid_keyring(struct user_struct *new_user); extern int copy_keys(unsigned long clone_flags, struct task_struct *tsk); -extern int copy_thread_group_keys(struct task_struct *tsk); extern void exit_keys(struct task_struct *tsk); -extern void exit_thread_group_keys(struct signal_struct *tg); extern int suid_keys(struct task_struct *tsk); extern int exec_keys(struct task_struct *tsk); extern void key_fsuid_changed(struct task_struct *tsk); @@ -289,8 +287,8 @@ extern void key_init(void); #define __install_session_keyring(keyring) \ ({ \ - struct key *old_session = current->signal->session_keyring; \ - current->signal->session_keyring = keyring; \ + struct key *old_session = current->cred->tgcred->session_keyring; \ + current->cred->tgcred->session_keyring = keyring; \ old_session; \ }) @@ -308,9 +306,7 @@ extern void key_init(void); #define switch_uid_keyring(u) do { } while(0) #define __install_session_keyring(k) ({ NULL; }) #define copy_keys(f,t) 0 -#define copy_thread_group_keys(t) 0 #define exit_keys(t) do { } while(0) -#define exit_thread_group_keys(tg) do { } while(0) #define suid_keys(t) do { } while(0) #define exec_keys(t) do { } while(0) #define key_fsuid_changed(t) do { } while(0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 740cf946c8cc..2913252989b3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -571,12 +571,6 @@ struct signal_struct { */ struct rlimit rlim[RLIM_NLIMITS]; - /* keep the process-shared keyrings here so that they do the right - * thing in threads created with CLONE_THREAD */ -#ifdef CONFIG_KEYS - struct key *session_keyring; /* keyring inherited over fork */ - struct key *process_keyring; /* keyring private to this process */ -#endif #ifdef CONFIG_BSD_PROCESS_ACCT struct pacct_struct pacct; /* per-process accounting information */ #endif diff --git a/kernel/cred.c b/kernel/cred.c index 833244a7cb05..ac73e3617684 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -16,6 +16,17 @@ #include #include +/* + * The common credentials for the initial task's thread group + */ +#ifdef CONFIG_KEYS +static struct thread_group_cred init_tgcred = { + .usage = ATOMIC_INIT(2), + .tgid = 0, + .lock = SPIN_LOCK_UNLOCKED, +}; +#endif + /* * The initial credentials for the initial task */ @@ -28,8 +39,41 @@ struct cred init_cred = { .cap_bset = CAP_INIT_BSET, .user = INIT_USER, .group_info = &init_groups, +#ifdef CONFIG_KEYS + .tgcred = &init_tgcred, +#endif }; +/* + * Dispose of the shared task group credentials + */ +#ifdef CONFIG_KEYS +static void release_tgcred_rcu(struct rcu_head *rcu) +{ + struct thread_group_cred *tgcred = + container_of(rcu, struct thread_group_cred, rcu); + + BUG_ON(atomic_read(&tgcred->usage) != 0); + + key_put(tgcred->session_keyring); + key_put(tgcred->process_keyring); + kfree(tgcred); +} +#endif + +/* + * Release a set of thread group credentials. + */ +static void release_tgcred(struct cred *cred) +{ +#ifdef CONFIG_KEYS + struct thread_group_cred *tgcred = cred->tgcred; + + if (atomic_dec_and_test(&tgcred->usage)) + call_rcu(&tgcred->rcu, release_tgcred_rcu); +#endif +} + /* * The RCU callback to actually dispose of a set of credentials */ @@ -41,6 +85,7 @@ static void put_cred_rcu(struct rcu_head *rcu) key_put(cred->thread_keyring); key_put(cred->request_key_auth); + release_tgcred(cred); put_group_info(cred->group_info); free_uid(cred->user); security_cred_free(cred); @@ -71,12 +116,30 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) if (!pcred) return -ENOMEM; +#ifdef CONFIG_KEYS + if (clone_flags & CLONE_THREAD) { + atomic_inc(&pcred->tgcred->usage); + } else { + pcred->tgcred = kmalloc(sizeof(struct cred), GFP_KERNEL); + if (!pcred->tgcred) { + kfree(pcred); + return -ENOMEM; + } + atomic_set(&pcred->tgcred->usage, 1); + spin_lock_init(&pcred->tgcred->lock); + pcred->tgcred->process_keyring = NULL; + pcred->tgcred->session_keyring = + key_get(p->cred->tgcred->session_keyring); + } +#endif + #ifdef CONFIG_SECURITY pcred->security = NULL; #endif ret = security_cred_alloc(pcred); if (ret < 0) { + release_tgcred(pcred); kfree(pcred); return ret; } diff --git a/kernel/fork.c b/kernel/fork.c index c932e283ddfc..ded1972672a3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -802,12 +802,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) if (!sig) return -ENOMEM; - ret = copy_thread_group_keys(tsk); - if (ret < 0) { - kmem_cache_free(signal_cachep, sig); - return ret; - } - atomic_set(&sig->count, 1); atomic_set(&sig->live, 1); init_waitqueue_head(&sig->wait_chldexit); @@ -852,7 +846,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) void __cleanup_signal(struct signal_struct *sig) { thread_group_cputime_free(sig); - exit_thread_group_keys(sig); tty_kref_put(sig->tty); kmem_cache_free(signal_cachep, sig); } diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 212601ebaa46..70ee93406f30 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -189,7 +189,7 @@ int install_process_keyring(void) might_sleep(); - if (!tsk->signal->process_keyring) { + if (!tsk->cred->tgcred->process_keyring) { sprintf(buf, "_pid.%u", tsk->tgid); keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, @@ -200,12 +200,12 @@ int install_process_keyring(void) } /* attach keyring */ - spin_lock_irq(&tsk->sighand->siglock); - if (!tsk->signal->process_keyring) { - tsk->signal->process_keyring = keyring; + spin_lock_irq(&tsk->cred->tgcred->lock); + if (!tsk->cred->tgcred->process_keyring) { + tsk->cred->tgcred->process_keyring = keyring; keyring = NULL; } - spin_unlock_irq(&tsk->sighand->siglock); + spin_unlock_irq(&tsk->cred->tgcred->lock); key_put(keyring); } @@ -235,11 +235,11 @@ static int install_session_keyring(struct key *keyring) sprintf(buf, "_ses.%u", tsk->tgid); flags = KEY_ALLOC_QUOTA_OVERRUN; - if (tsk->signal->session_keyring) + if (tsk->cred->tgcred->session_keyring) flags = KEY_ALLOC_IN_QUOTA; - keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, - flags, NULL); + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, + tsk, flags, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); } @@ -248,10 +248,10 @@ static int install_session_keyring(struct key *keyring) } /* install the keyring */ - spin_lock_irq(&tsk->sighand->siglock); - old = tsk->signal->session_keyring; - rcu_assign_pointer(tsk->signal->session_keyring, keyring); - spin_unlock_irq(&tsk->sighand->siglock); + spin_lock_irq(&tsk->cred->tgcred->lock); + old = tsk->cred->tgcred->session_keyring; + rcu_assign_pointer(tsk->cred->tgcred->session_keyring, keyring); + spin_unlock_irq(&tsk->cred->tgcred->lock); /* we're using RCU on the pointer, but there's no point synchronising * on it if it didn't previously point to anything */ @@ -264,28 +264,6 @@ static int install_session_keyring(struct key *keyring) } /* end install_session_keyring() */ -/*****************************************************************************/ -/* - * copy the keys in a thread group for fork without CLONE_THREAD - */ -int copy_thread_group_keys(struct task_struct *tsk) -{ - key_check(current->thread_group->session_keyring); - key_check(current->thread_group->process_keyring); - - /* no process keyring yet */ - tsk->signal->process_keyring = NULL; - - /* same session keyring */ - rcu_read_lock(); - tsk->signal->session_keyring = - key_get(rcu_dereference(current->signal->session_keyring)); - rcu_read_unlock(); - - return 0; - -} /* end copy_thread_group_keys() */ - /*****************************************************************************/ /* * copy the keys for fork @@ -305,17 +283,6 @@ int copy_keys(unsigned long clone_flags, struct task_struct *tsk) } /* end copy_keys() */ -/*****************************************************************************/ -/* - * dispose of thread group keys upon thread group destruction - */ -void exit_thread_group_keys(struct signal_struct *tg) -{ - key_put(tg->session_keyring); - key_put(tg->process_keyring); - -} /* end exit_thread_group_keys() */ - /*****************************************************************************/ /* * dispose of per-thread keys upon thread exit @@ -344,10 +311,10 @@ int exec_keys(struct task_struct *tsk) key_put(old); /* discard the process keyring from a newly exec'd task */ - spin_lock_irq(&tsk->sighand->siglock); - old = tsk->signal->process_keyring; - tsk->signal->process_keyring = NULL; - spin_unlock_irq(&tsk->sighand->siglock); + spin_lock_irq(&tsk->cred->tgcred->lock); + old = tsk->cred->tgcred->process_keyring; + tsk->cred->tgcred->process_keyring = NULL; + spin_unlock_irq(&tsk->cred->tgcred->lock); key_put(old); @@ -452,9 +419,9 @@ key_ref_t search_process_keyrings(struct key_type *type, } /* search the process keyring second */ - if (context->signal->process_keyring) { + if (cred->tgcred->process_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->signal->process_keyring, 1), + make_key_ref(cred->tgcred->process_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -473,11 +440,11 @@ key_ref_t search_process_keyrings(struct key_type *type, } /* search the session keyring */ - if (context->signal->session_keyring) { + if (cred->tgcred->session_keyring) { rcu_read_lock(); key_ref = keyring_search_aux( make_key_ref(rcu_dereference( - context->signal->session_keyring), + cred->tgcred->session_keyring), 1), context, type, description, match); rcu_read_unlock(); @@ -586,11 +553,13 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, { struct request_key_auth *rka; struct task_struct *t = current; - struct cred *cred = current_cred(); + struct cred *cred; struct key *key; key_ref_t key_ref, skey_ref; int ret; +try_again: + cred = get_current_cred(); key_ref = ERR_PTR(-ENOKEY); switch (id) { @@ -604,6 +573,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, key = ERR_PTR(ret); goto error; } + goto reget_creds; } key = cred->thread_keyring; @@ -612,7 +582,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, break; case KEY_SPEC_PROCESS_KEYRING: - if (!t->signal->process_keyring) { + if (!cred->tgcred->process_keyring) { if (!create) goto error; @@ -621,15 +591,16 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, key = ERR_PTR(ret); goto error; } + goto reget_creds; } - key = t->signal->process_keyring; + key = cred->tgcred->process_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_SESSION_KEYRING: - if (!t->signal->session_keyring) { + if (!cred->tgcred->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ ret = install_user_keyrings(); @@ -639,10 +610,11 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, cred->user->session_keyring); if (ret < 0) goto error; + goto reget_creds; } rcu_read_lock(); - key = rcu_dereference(t->signal->session_keyring); + key = rcu_dereference(cred->tgcred->session_keyring); atomic_inc(&key->usage); rcu_read_unlock(); key_ref = make_key_ref(key, 1); @@ -758,6 +730,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, goto invalid_key; error: + put_cred(cred); return key_ref; invalid_key: @@ -765,6 +738,12 @@ invalid_key: key_ref = ERR_PTR(ret); goto error; + /* if we attempted to install a keyring, then it may have caused new + * creds to be installed */ +reget_creds: + put_cred(cred); + goto try_again; + } /* end lookup_user_key() */ /*****************************************************************************/ @@ -777,6 +756,7 @@ invalid_key: long join_session_keyring(const char *name) { struct task_struct *tsk = current; + struct cred *cred = current->cred; struct key *keyring; long ret; @@ -787,7 +767,7 @@ long join_session_keyring(const char *name) goto error; rcu_read_lock(); - ret = rcu_dereference(tsk->signal->session_keyring)->serial; + ret = rcu_dereference(cred->tgcred->session_keyring)->serial; rcu_read_unlock(); goto error; } @@ -799,7 +779,7 @@ long join_session_keyring(const char *name) keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ - keyring = keyring_alloc(name, tsk->cred->uid, tsk->cred->gid, tsk, + keyring = keyring_alloc(name, cred->uid, cred->gid, tsk, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 0488b0af5bd6..3d12558362df 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -66,7 +66,6 @@ static int call_sbin_request_key(struct key_construction *cons, const char *op, void *aux) { - struct task_struct *tsk = current; const struct cred *cred = current_cred(); key_serial_t prkey, sskey; struct key *key = cons->key, *authkey = cons->authkey, *keyring; @@ -109,18 +108,13 @@ static int call_sbin_request_key(struct key_construction *cons, cred->thread_keyring->serial : 0); prkey = 0; - if (tsk->signal->process_keyring) - prkey = tsk->signal->process_keyring->serial; + if (cred->tgcred->process_keyring) + prkey = cred->tgcred->process_keyring->serial; - sprintf(keyring_str[1], "%d", prkey); - - if (tsk->signal->session_keyring) { - rcu_read_lock(); - sskey = rcu_dereference(tsk->signal->session_keyring)->serial; - rcu_read_unlock(); - } else { + if (cred->tgcred->session_keyring) + sskey = rcu_dereference(cred->tgcred->session_keyring)->serial; + else sskey = cred->user->session_keyring->serial; - } sprintf(keyring_str[2], "%d", sskey); @@ -222,7 +216,7 @@ static int construct_key(struct key *key, const void *callout_info, static void construct_get_dest_keyring(struct key **_dest_keyring) { struct request_key_auth *rka; - struct task_struct *tsk = current; + const struct cred *cred = current_cred(); struct key *dest_keyring = *_dest_keyring, *authkey; kenter("%p", dest_keyring); @@ -234,11 +228,11 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) } else { /* use a default keyring; falling through the cases until we * find one that we actually have */ - switch (tsk->cred->jit_keyring) { + switch (cred->jit_keyring) { case KEY_REQKEY_DEFL_DEFAULT: case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: - if (tsk->cred->request_key_auth) { - authkey = tsk->cred->request_key_auth; + if (cred->request_key_auth) { + authkey = cred->request_key_auth; down_read(&authkey->sem); rka = authkey->payload.data; if (!test_bit(KEY_FLAG_REVOKED, @@ -251,19 +245,19 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) } case KEY_REQKEY_DEFL_THREAD_KEYRING: - dest_keyring = key_get(tsk->cred->thread_keyring); + dest_keyring = key_get(cred->thread_keyring); if (dest_keyring) break; case KEY_REQKEY_DEFL_PROCESS_KEYRING: - dest_keyring = key_get(tsk->signal->process_keyring); + dest_keyring = key_get(cred->tgcred->process_keyring); if (dest_keyring) break; case KEY_REQKEY_DEFL_SESSION_KEYRING: rcu_read_lock(); dest_keyring = key_get( - rcu_dereference(tsk->signal->session_keyring)); + rcu_dereference(cred->tgcred->session_keyring)); rcu_read_unlock(); if (dest_keyring) @@ -271,11 +265,11 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: dest_keyring = - key_get(tsk->cred->user->session_keyring); + key_get(cred->user->session_keyring); break; case KEY_REQKEY_DEFL_USER_KEYRING: - dest_keyring = key_get(tsk->cred->user->uid_keyring); + dest_keyring = key_get(cred->user->uid_keyring); break; case KEY_REQKEY_DEFL_GROUP_KEYRING: -- cgit v1.2.3 From 745ca2475a6ac596e3d8d37c2759c0fbe2586227 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:22 +1100 Subject: CRED: Pass credentials through dentry_open() Pass credentials through dentry_open() so that the COW creds patch can have SELinux's flush_unauthorized_files() pass the appropriate creds back to itself when it opens its null chardev. The security_dentry_open() call also now takes a creds pointer, as does the dentry_open hook in struct security_operations. Signed-off-by: David Howells Acked-by: James Morris Signed-off-by: James Morris --- arch/powerpc/platforms/cell/spufs/inode.c | 4 ++-- arch/um/drivers/mconsole_kern.c | 3 ++- fs/autofs4/dev-ioctl.c | 3 ++- fs/ecryptfs/ecryptfs_kernel.h | 3 ++- fs/ecryptfs/kthread.c | 9 +++++---- fs/ecryptfs/main.c | 3 ++- fs/exportfs/expfs.c | 4 +++- fs/hppfs/hppfs.c | 6 ++++-- fs/nfsctl.c | 3 ++- fs/nfsd/nfs4recover.c | 3 ++- fs/nfsd/vfs.c | 3 ++- fs/open.c | 17 +++++++++++------ fs/xfs/linux-2.6/xfs_ioctl.c | 3 ++- include/linux/fs.h | 4 +++- include/linux/security.h | 7 ++++--- ipc/mqueue.c | 11 +++++++---- security/capability.c | 2 +- security/security.c | 4 ++-- security/selinux/hooks.c | 15 +++++++++------ 19 files changed, 67 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index e128ce7f0993..6296bfd9cb0b 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -323,7 +323,7 @@ static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt) goto out; } - filp = dentry_open(dentry, mnt, O_RDONLY); + filp = dentry_open(dentry, mnt, O_RDONLY, current_cred()); if (IS_ERR(filp)) { put_unused_fd(ret); ret = PTR_ERR(filp); @@ -562,7 +562,7 @@ static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt) goto out; } - filp = dentry_open(dentry, mnt, O_RDONLY); + filp = dentry_open(dentry, mnt, O_RDONLY, current_cred()); if (IS_ERR(filp)) { put_unused_fd(ret); ret = PTR_ERR(filp); diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 19d579d74d27..16d3b3789a50 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -159,7 +159,8 @@ void mconsole_proc(struct mc_request *req) goto out_kill; } - file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY); + file = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY, + current_cred()); if (IS_ERR(file)) { mconsole_reply(req, "Failed to open file", 1, 0); goto out_kill; diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 625abf5422e2..ec16255d27dd 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -307,7 +307,8 @@ static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid) goto out; } - filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY); + filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY, + current_cred()); if (IS_ERR(filp)) { err = PTR_ERR(filp); goto out; diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 3504cf9df358..a75026d35d16 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -691,7 +691,8 @@ int ecryptfs_init_kthread(void); void ecryptfs_destroy_kthread(void); int ecryptfs_privileged_open(struct file **lower_file, struct dentry *lower_dentry, - struct vfsmount *lower_mnt); + struct vfsmount *lower_mnt, + const struct cred *cred); int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry); #endif /* #ifndef ECRYPTFS_KERNEL_H */ diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index c440c6b58b2d..c6d7a4d748a0 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -73,7 +73,7 @@ static int ecryptfs_threadfn(void *ignored) mntget(req->lower_mnt); (*req->lower_file) = dentry_open( req->lower_dentry, req->lower_mnt, - (O_RDWR | O_LARGEFILE)); + (O_RDWR | O_LARGEFILE), current_cred()); req->flags |= ECRYPTFS_REQ_PROCESSED; } wake_up(&req->wait); @@ -132,7 +132,8 @@ void ecryptfs_destroy_kthread(void) */ int ecryptfs_privileged_open(struct file **lower_file, struct dentry *lower_dentry, - struct vfsmount *lower_mnt) + struct vfsmount *lower_mnt, + const struct cred *cred) { struct ecryptfs_open_req *req; int rc = 0; @@ -143,7 +144,7 @@ int ecryptfs_privileged_open(struct file **lower_file, dget(lower_dentry); mntget(lower_mnt); (*lower_file) = dentry_open(lower_dentry, lower_mnt, - (O_RDWR | O_LARGEFILE)); + (O_RDWR | O_LARGEFILE), cred); if (!IS_ERR(*lower_file)) goto out; req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL); @@ -184,7 +185,7 @@ int ecryptfs_privileged_open(struct file **lower_file, dget(lower_dentry); mntget(lower_mnt); (*lower_file) = dentry_open(lower_dentry, lower_mnt, - (O_RDONLY | O_LARGEFILE)); + (O_RDONLY | O_LARGEFILE), cred); if (IS_ERR(*lower_file)) { rc = PTR_ERR(*req->lower_file); (*lower_file) = NULL; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 64d2ba980df4..fd630713c5c7 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -115,6 +115,7 @@ void __ecryptfs_printk(const char *fmt, ...) */ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) { + const struct cred *cred = current_cred(); struct ecryptfs_inode_info *inode_info = ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); int rc = 0; @@ -127,7 +128,7 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); rc = ecryptfs_privileged_open(&inode_info->lower_file, - lower_dentry, lower_mnt); + lower_dentry, lower_mnt, cred); if (rc || IS_ERR(inode_info->lower_file)) { printk(KERN_ERR "Error opening lower persistent file " "for lower_dentry [0x%p] and lower_mnt [0x%p]; " diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 80246bad1b7f..ec1fb918200f 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -14,6 +14,7 @@ #include #include #include +#include #define dprintk(fmt, args...) do{}while(0) @@ -249,6 +250,7 @@ static int filldir_one(void * __buf, const char * name, int len, static int get_name(struct vfsmount *mnt, struct dentry *dentry, char *name, struct dentry *child) { + const struct cred *cred = current_cred(); struct inode *dir = dentry->d_inode; int error; struct file *file; @@ -263,7 +265,7 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry, /* * Open the directory ... */ - file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY); + file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, cred); error = PTR_ERR(file); if (IS_ERR(file)) goto out; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 2b3d1828db99..795e2c130ad7 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -426,6 +426,7 @@ static int file_mode(int fmode) static int hppfs_open(struct inode *inode, struct file *file) { + const struct cred *cred = current_cred(); struct hppfs_private *data; struct vfsmount *proc_mnt; struct dentry *proc_dentry; @@ -446,7 +447,7 @@ static int hppfs_open(struct inode *inode, struct file *file) /* XXX This isn't closed anywhere */ data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), - file_mode(file->f_mode)); + file_mode(file->f_mode), cred); err = PTR_ERR(data->proc_file); if (IS_ERR(data->proc_file)) goto out_free1; @@ -489,6 +490,7 @@ static int hppfs_open(struct inode *inode, struct file *file) static int hppfs_dir_open(struct inode *inode, struct file *file) { + const struct cred *cred = current_cred(); struct hppfs_private *data; struct vfsmount *proc_mnt; struct dentry *proc_dentry; @@ -502,7 +504,7 @@ static int hppfs_dir_open(struct inode *inode, struct file *file) proc_dentry = HPPFS_I(inode)->proc_dentry; proc_mnt = inode->i_sb->s_fs_info; data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), - file_mode(file->f_mode)); + file_mode(file->f_mode), cred); err = PTR_ERR(data->proc_file); if (IS_ERR(data->proc_file)) goto out_free; diff --git a/fs/nfsctl.c b/fs/nfsctl.c index aed8145d9087..cc4ef2642a51 100644 --- a/fs/nfsctl.c +++ b/fs/nfsctl.c @@ -41,7 +41,8 @@ static struct file *do_open(char *name, int flags) error = may_open(&nd, MAY_WRITE, FMODE_WRITE); if (!error) - return dentry_open(nd.path.dentry, nd.path.mnt, flags); + return dentry_open(nd.path.dentry, nd.path.mnt, flags, + current_cred()); path_put(&nd.path); return ERR_PTR(error); diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index a5e14e8695ea..632a50b4b371 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -226,7 +226,8 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) nfs4_save_user(&uid, &gid); - filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY); + filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY, + current_cred()); status = PTR_ERR(filp); if (IS_ERR(filp)) goto out; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 890d9a68c852..b59ec5a6ed24 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -671,6 +671,7 @@ __be32 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access, struct file **filp) { + const struct cred *cred = current_cred(); struct dentry *dentry; struct inode *inode; int flags = O_RDONLY|O_LARGEFILE; @@ -725,7 +726,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, DQUOT_INIT(inode); } *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), - flags); + flags, cred); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); out_nfserr: diff --git a/fs/open.c b/fs/open.c index b1238e195e7e..f96eaab280a3 100644 --- a/fs/open.c +++ b/fs/open.c @@ -783,7 +783,8 @@ static inline int __get_file_write_access(struct inode *inode, static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, struct file *f, - int (*open)(struct inode *, struct file *)) + int (*open)(struct inode *, struct file *), + const struct cred *cred) { struct inode *inode; int error; @@ -807,7 +808,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_op = fops_get(inode->i_fop); file_move(f, &inode->i_sb->s_files); - error = security_dentry_open(f); + error = security_dentry_open(f, cred); if (error) goto cleanup_all; @@ -882,6 +883,8 @@ cleanup_file: struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, int (*open)(struct inode *, struct file *)) { + const struct cred *cred = current_cred(); + if (IS_ERR(nd->intent.open.file)) goto out; if (IS_ERR(dentry)) @@ -889,7 +892,7 @@ struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt), nd->intent.open.flags - 1, nd->intent.open.file, - open); + open, cred); out: return nd->intent.open.file; out_err: @@ -908,6 +911,7 @@ EXPORT_SYMBOL_GPL(lookup_instantiate_filp); */ struct file *nameidata_to_filp(struct nameidata *nd, int flags) { + const struct cred *cred = current_cred(); struct file *filp; /* Pick up the filp from the open intent */ @@ -915,7 +919,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags) /* Has the filesystem initialised the file for us? */ if (filp->f_path.dentry == NULL) filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp, - NULL); + NULL, cred); else path_put(&nd->path); return filp; @@ -925,7 +929,8 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags) * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an * error. */ -struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) +struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, + const struct cred *cred) { int error; struct file *f; @@ -950,7 +955,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) return ERR_PTR(error); } - return __dentry_open(dentry, mnt, flags, f, NULL); + return __dentry_open(dentry, mnt, flags, f, NULL, cred); } EXPORT_SYMBOL(dentry_open); diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 67c72aec97e6..281cbd5a25cf 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -256,6 +256,7 @@ xfs_open_by_handle( struct file *parfilp, struct inode *parinode) { + const struct cred *cred = current_cred(); int error; int new_fd; int permflag; @@ -321,7 +322,7 @@ xfs_open_by_handle( mntget(parfilp->f_path.mnt); /* Create file pointer. */ - filp = dentry_open(dentry, parfilp->f_path.mnt, hreq.oflags); + filp = dentry_open(dentry, parfilp->f_path.mnt, hreq.oflags, cred); if (IS_ERR(filp)) { put_unused_fd(new_fd); return -XFS_ERROR(-PTR_ERR(filp)); diff --git a/include/linux/fs.h b/include/linux/fs.h index b3d404aaabed..3bfec1327b8d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -315,6 +315,7 @@ struct poll_table_struct; struct kstatfs; struct vm_area_struct; struct vfsmount; +struct cred; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -1673,7 +1674,8 @@ extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, extern long do_sys_open(int dfd, const char __user *filename, int flags, int mode); extern struct file *filp_open(const char *, int, int); -extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); +extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, + const struct cred *); extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); diff --git a/include/linux/security.h b/include/linux/security.h index 9239cc11eb9c..7e9fe046a0d1 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1402,7 +1402,7 @@ struct security_operations { int (*file_send_sigiotask) (struct task_struct *tsk, struct fown_struct *fown, int sig); int (*file_receive) (struct file *file); - int (*dentry_open) (struct file *file); + int (*dentry_open) (struct file *file, const struct cred *cred); int (*task_create) (unsigned long clone_flags); int (*cred_alloc_security) (struct cred *cred); @@ -1658,7 +1658,7 @@ int security_file_set_fowner(struct file *file); int security_file_send_sigiotask(struct task_struct *tsk, struct fown_struct *fown, int sig); int security_file_receive(struct file *file); -int security_dentry_open(struct file *file); +int security_dentry_open(struct file *file, const struct cred *cred); int security_task_create(unsigned long clone_flags); int security_cred_alloc(struct cred *cred); void security_cred_free(struct cred *cred); @@ -2171,7 +2171,8 @@ static inline int security_file_receive(struct file *file) return 0; } -static inline int security_dentry_open(struct file *file) +static inline int security_dentry_open(struct file *file, + const struct cred *cred) { return 0; } diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 1151881ccb9a..d9393f8e4c3e 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -594,6 +594,7 @@ static int mq_attr_ok(struct mq_attr *attr) static struct file *do_create(struct dentry *dir, struct dentry *dentry, int oflag, mode_t mode, struct mq_attr __user *u_attr) { + const struct cred *cred = current_cred(); struct mq_attr attr; struct file *result; int ret; @@ -618,7 +619,7 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry, if (ret) goto out_drop_write; - result = dentry_open(dentry, mqueue_mnt, oflag); + result = dentry_open(dentry, mqueue_mnt, oflag, cred); /* * dentry_open() took a persistent mnt_want_write(), * so we can now drop this one. @@ -637,8 +638,10 @@ out: /* Opens existing queue */ static struct file *do_open(struct dentry *dentry, int oflag) { -static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, - MAY_READ | MAY_WRITE }; + const struct cred *cred = current_cred(); + + static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, + MAY_READ | MAY_WRITE }; if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) { dput(dentry); @@ -652,7 +655,7 @@ static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, return ERR_PTR(-EACCES); } - return dentry_open(dentry, mqueue_mnt, oflag); + return dentry_open(dentry, mqueue_mnt, oflag, cred); } asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode, diff --git a/security/capability.c b/security/capability.c index 6c4b5137ca7b..fac2f61b69a9 100644 --- a/security/capability.c +++ b/security/capability.c @@ -330,7 +330,7 @@ static int cap_file_receive(struct file *file) return 0; } -static int cap_dentry_open(struct file *file) +static int cap_dentry_open(struct file *file, const struct cred *cred) { return 0; } diff --git a/security/security.c b/security/security.c index d058f7d5b10a..f40a0a04c3c2 100644 --- a/security/security.c +++ b/security/security.c @@ -606,9 +606,9 @@ int security_file_receive(struct file *file) return security_ops->file_receive(file); } -int security_dentry_open(struct file *file) +int security_dentry_open(struct file *file, const struct cred *cred) { - return security_ops->dentry_open(file); + return security_ops->dentry_open(file, cred); } int security_task_create(unsigned long clone_flags) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index cc6e5a3f10cc..f20cbd681ba6 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2150,9 +2150,9 @@ extern struct vfsmount *selinuxfs_mount; extern struct dentry *selinux_null; /* Derived from fs/exec.c:flush_old_files. */ -static inline void flush_unauthorized_files(struct files_struct *files) +static inline void flush_unauthorized_files(const struct cred *cred, + struct files_struct *files) { - const struct cred *cred = current_cred(); struct avc_audit_data ad; struct file *file, *devnull = NULL; struct tty_struct *tty; @@ -2222,7 +2222,10 @@ static inline void flush_unauthorized_files(struct files_struct *files) if (devnull) { get_file(devnull); } else { - devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR); + devnull = dentry_open( + dget(selinux_null), + mntget(selinuxfs_mount), + O_RDWR, cred); if (IS_ERR(devnull)) { devnull = NULL; put_unused_fd(fd); @@ -2302,6 +2305,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) */ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm) { + const struct cred *cred = current_cred(); struct task_security_struct *tsec; struct rlimit *rlim, *initrlim; struct itimerval itimer; @@ -2321,7 +2325,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm) return; /* Close files for which the new task SID is not authorized. */ - flush_unauthorized_files(current->files); + flush_unauthorized_files(cred, current->files); /* Check whether the new SID can inherit signal state from the old SID. If not, clear itimers to avoid @@ -3202,9 +3206,8 @@ static int selinux_file_receive(struct file *file) return file_has_perm(cred, file, file_to_av(file)); } -static int selinux_dentry_open(struct file *file) +static int selinux_dentry_open(struct file *file, const struct cred *cred) { - const struct cred *cred = current_cred(); struct file_security_struct *fsec; struct inode *inode; struct inode_security_struct *isec; -- cgit v1.2.3 From d84f4f992cbd76e8f39c488cf0c5d123843923b1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:23 +1100 Subject: CRED: Inaugurate COW credentials Inaugurate copy-on-write credentials management. This uses RCU to manage the credentials pointer in the task_struct with respect to accesses by other tasks. A process may only modify its own credentials, and so does not need locking to access or modify its own credentials. A mutex (cred_replace_mutex) is added to the task_struct to control the effect of PTRACE_ATTACHED on credential calculations, particularly with respect to execve(). With this patch, the contents of an active credentials struct may not be changed directly; rather a new set of credentials must be prepared, modified and committed using something like the following sequence of events: struct cred *new = prepare_creds(); int ret = blah(new); if (ret < 0) { abort_creds(new); return ret; } return commit_creds(new); There are some exceptions to this rule: the keyrings pointed to by the active credentials may be instantiated - keyrings violate the COW rule as managing COW keyrings is tricky, given that it is possible for a task to directly alter the keys in a keyring in use by another task. To help enforce this, various pointers to sets of credentials, such as those in the task_struct, are declared const. The purpose of this is compile-time discouragement of altering credentials through those pointers. Once a set of credentials has been made public through one of these pointers, it may not be modified, except under special circumstances: (1) Its reference count may incremented and decremented. (2) The keyrings to which it points may be modified, but not replaced. The only safe way to modify anything else is to create a replacement and commit using the functions described in Documentation/credentials.txt (which will be added by a later patch). This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). This now prepares and commits credentials in various places in the security code rather than altering the current creds directly. (2) Temporary credential overrides. do_coredump() and sys_faccessat() now prepare their own credentials and temporarily override the ones currently on the acting thread, whilst preventing interference from other threads by holding cred_replace_mutex on the thread being dumped. This will be replaced in a future patch by something that hands down the credentials directly to the functions being called, rather than altering the task's objective credentials. (3) LSM interface. A number of functions have been changed, added or removed: (*) security_capset_check(), ->capset_check() (*) security_capset_set(), ->capset_set() Removed in favour of security_capset(). (*) security_capset(), ->capset() New. This is passed a pointer to the new creds, a pointer to the old creds and the proposed capability sets. It should fill in the new creds or return an error. All pointers, barring the pointer to the new creds, are now const. (*) security_bprm_apply_creds(), ->bprm_apply_creds() Changed; now returns a value, which will cause the process to be killed if it's an error. (*) security_task_alloc(), ->task_alloc_security() Removed in favour of security_prepare_creds(). (*) security_cred_free(), ->cred_free() New. Free security data attached to cred->security. (*) security_prepare_creds(), ->cred_prepare() New. Duplicate any security data attached to cred->security. (*) security_commit_creds(), ->cred_commit() New. Apply any security effects for the upcoming installation of new security by commit_creds(). (*) security_task_post_setuid(), ->task_post_setuid() Removed in favour of security_task_fix_setuid(). (*) security_task_fix_setuid(), ->task_fix_setuid() Fix up the proposed new credentials for setuid(). This is used by cap_set_fix_setuid() to implicitly adjust capabilities in line with setuid() changes. Changes are made to the new credentials, rather than the task itself as in security_task_post_setuid(). (*) security_task_reparent_to_init(), ->task_reparent_to_init() Removed. Instead the task being reparented to init is referred directly to init's credentials. NOTE! This results in the loss of some state: SELinux's osid no longer records the sid of the thread that forked it. (*) security_key_alloc(), ->key_alloc() (*) security_key_permission(), ->key_permission() Changed. These now take cred pointers rather than task pointers to refer to the security context. (4) sys_capset(). This has been simplified and uses less locking. The LSM functions it calls have been merged. (5) reparent_to_kthreadd(). This gives the current thread the same credentials as init by simply using commit_thread() to point that way. (6) __sigqueue_alloc() and switch_uid() __sigqueue_alloc() can't stop the target task from changing its creds beneath it, so this function gets a reference to the currently applicable user_struct which it then passes into the sigqueue struct it returns if successful. switch_uid() is now called from commit_creds(), and possibly should be folded into that. commit_creds() should take care of protecting __sigqueue_alloc(). (7) [sg]et[ug]id() and co and [sg]et_current_groups. The set functions now all use prepare_creds(), commit_creds() and abort_creds() to build and check a new set of credentials before applying it. security_task_set[ug]id() is called inside the prepared section. This guarantees that nothing else will affect the creds until we've finished. The calling of set_dumpable() has been moved into commit_creds(). Much of the functionality of set_user() has been moved into commit_creds(). The get functions all simply access the data directly. (8) security_task_prctl() and cap_task_prctl(). security_task_prctl() has been modified to return -ENOSYS if it doesn't want to handle a function, or otherwise return the return value directly rather than through an argument. Additionally, cap_task_prctl() now prepares a new set of credentials, even if it doesn't end up using it. (9) Keyrings. A number of changes have been made to the keyrings code: (a) switch_uid_keyring(), copy_keys(), exit_keys() and suid_keys() have all been dropped and built in to the credentials functions directly. They may want separating out again later. (b) key_alloc() and search_process_keyrings() now take a cred pointer rather than a task pointer to specify the security context. (c) copy_creds() gives a new thread within the same thread group a new thread keyring if its parent had one, otherwise it discards the thread keyring. (d) The authorisation key now points directly to the credentials to extend the search into rather pointing to the task that carries them. (e) Installing thread, process or session keyrings causes a new set of credentials to be created, even though it's not strictly necessary for process or session keyrings (they're shared). (10) Usermode helper. The usermode helper code now carries a cred struct pointer in its subprocess_info struct instead of a new session keyring pointer. This set of credentials is derived from init_cred and installed on the new process after it has been cloned. call_usermodehelper_setup() allocates the new credentials and call_usermodehelper_freeinfo() discards them if they haven't been used. A special cred function (prepare_usermodeinfo_creds()) is provided specifically for call_usermodehelper_setup() to call. call_usermodehelper_setkeys() adjusts the credentials to sport the supplied keyring as the new session keyring. (11) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) selinux_setprocattr() no longer does its check for whether the current ptracer can access processes with the new SID inside the lock that covers getting the ptracer's SID. Whilst this lock ensures that the check is done with the ptracer pinned, the result is only valid until the lock is released, so there's no point doing it inside the lock. (12) is_single_threaded(). This function has been extracted from selinux_setprocattr() and put into a file of its own in the lib/ directory as join_session_keyring() now wants to use it too. The code in SELinux just checked to see whether a task shared mm_structs with other tasks (CLONE_VM), but that isn't good enough. We really want to know if they're part of the same thread group (CLONE_THREAD). (13) nfsd. The NFS server daemon now has to use the COW credentials to set the credentials it is going to use. It really needs to pass the credentials down to the functions it calls, but it can't do that until other patches in this series have been applied. Signed-off-by: David Howells Acked-by: James Morris Signed-off-by: James Morris --- fs/exec.c | 31 ++- fs/nfsd/auth.c | 92 ++++---- fs/nfsd/nfs4recover.c | 68 +++--- fs/nfsd/nfsfh.c | 11 +- fs/open.c | 31 ++- include/linux/audit.h | 22 +- include/linux/capability.h | 2 - include/linux/cred.h | 44 +++- include/linux/init_task.h | 2 + include/linux/key.h | 22 +- include/linux/sched.h | 6 +- include/linux/security.h | 178 +++++++--------- init/main.c | 1 + kernel/auditsc.c | 42 ++-- kernel/capability.c | 78 +++---- kernel/cred-internals.h | 21 ++ kernel/cred.c | 321 ++++++++++++++++++++++++---- kernel/exit.c | 9 +- kernel/fork.c | 7 +- kernel/kmod.c | 30 ++- kernel/ptrace.c | 9 + kernel/signal.c | 10 +- kernel/sys.c | 450 +++++++++++++++++++++------------------ kernel/user.c | 37 +--- kernel/user_namespace.c | 12 +- lib/Makefile | 2 +- net/rxrpc/ar-key.c | 6 +- security/capability.c | 21 +- security/commoncap.c | 265 +++++++++++------------ security/keys/internal.h | 17 +- security/keys/key.c | 25 +-- security/keys/keyctl.c | 95 ++++++--- security/keys/keyring.c | 14 +- security/keys/permission.c | 24 ++- security/keys/proc.c | 8 +- security/keys/process_keys.c | 333 ++++++++++++++--------------- security/keys/request_key.c | 29 ++- security/keys/request_key_auth.c | 41 ++-- security/security.c | 58 +++-- security/selinux/hooks.c | 286 ++++++++++++------------- security/smack/smack_lsm.c | 82 ++++--- 41 files changed, 1603 insertions(+), 1239 deletions(-) create mode 100644 kernel/cred-internals.h (limited to 'include') diff --git a/fs/exec.c b/fs/exec.c index a5330e1a2216..9bd3559ddece 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1007,13 +1007,12 @@ int flush_old_exec(struct linux_binprm * bprm) */ current->mm->task_size = TASK_SIZE; - if (bprm->e_uid != current_euid() || bprm->e_gid != current_egid()) { - suid_keys(current); + if (bprm->e_uid != current_euid() || + bprm->e_gid != current_egid()) { set_dumpable(current->mm, suid_dumpable); current->pdeath_signal = 0; } else if (file_permission(bprm->file, MAY_READ) || (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { - suid_keys(current); set_dumpable(current->mm, suid_dumpable); } @@ -1096,10 +1095,8 @@ void compute_creds(struct linux_binprm *bprm) { int unsafe; - if (bprm->e_uid != current_uid()) { - suid_keys(current); + if (bprm->e_uid != current_uid()) current->pdeath_signal = 0; - } exec_keys(current); task_lock(current); @@ -1709,8 +1706,9 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) struct linux_binfmt * binfmt; struct inode * inode; struct file * file; + const struct cred *old_cred; + struct cred *cred; int retval = 0; - int fsuid = current_fsuid(); int flag = 0; int ispipe = 0; unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur; @@ -1723,12 +1721,20 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) binfmt = current->binfmt; if (!binfmt || !binfmt->core_dump) goto fail; + + cred = prepare_creds(); + if (!cred) { + retval = -ENOMEM; + goto fail; + } + down_write(&mm->mmap_sem); /* * If another thread got here first, or we are not dumpable, bail out. */ if (mm->core_state || !get_dumpable(mm)) { up_write(&mm->mmap_sem); + put_cred(cred); goto fail; } @@ -1739,12 +1745,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) */ if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ - current->cred->fsuid = 0; /* Dump root private */ + cred->fsuid = 0; /* Dump root private */ } retval = coredump_wait(exit_code, &core_state); - if (retval < 0) + if (retval < 0) { + put_cred(cred); goto fail; + } + + old_cred = override_creds(cred); /* * Clear any false indication of pending signals that might @@ -1835,7 +1845,8 @@ fail_unlock: if (helper_argv) argv_free(helper_argv); - current->cred->fsuid = fsuid; + revert_creds(old_cred); + put_cred(cred); coredump_finish(mm); fail: return retval; diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 808fc03a6fbd..836ffa1047d9 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -27,55 +27,67 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) { - struct cred *act_as = current->cred ; - struct svc_cred cred = rqstp->rq_cred; + struct group_info *rqgi; + struct group_info *gi; + struct cred *new; int i; int flags = nfsexp_flags(rqstp, exp); int ret; + new = prepare_creds(); + if (!new) + return -ENOMEM; + + new->fsuid = rqstp->rq_cred.cr_uid; + new->fsgid = rqstp->rq_cred.cr_gid; + + rqgi = rqstp->rq_cred.cr_group_info; + if (flags & NFSEXP_ALLSQUASH) { - cred.cr_uid = exp->ex_anon_uid; - cred.cr_gid = exp->ex_anon_gid; - cred.cr_group_info = groups_alloc(0); + new->fsuid = exp->ex_anon_uid; + new->fsgid = exp->ex_anon_gid; + gi = groups_alloc(0); } else if (flags & NFSEXP_ROOTSQUASH) { - struct group_info *gi; - if (!cred.cr_uid) - cred.cr_uid = exp->ex_anon_uid; - if (!cred.cr_gid) - cred.cr_gid = exp->ex_anon_gid; - gi = groups_alloc(cred.cr_group_info->ngroups); - if (gi) - for (i = 0; i < cred.cr_group_info->ngroups; i++) { - if (!GROUP_AT(cred.cr_group_info, i)) - GROUP_AT(gi, i) = exp->ex_anon_gid; - else - GROUP_AT(gi, i) = GROUP_AT(cred.cr_group_info, i); - } - cred.cr_group_info = gi; - } else - get_group_info(cred.cr_group_info); - - if (cred.cr_uid != (uid_t) -1) - act_as->fsuid = cred.cr_uid; - else - act_as->fsuid = exp->ex_anon_uid; - if (cred.cr_gid != (gid_t) -1) - act_as->fsgid = cred.cr_gid; - else - act_as->fsgid = exp->ex_anon_gid; + if (!new->fsuid) + new->fsuid = exp->ex_anon_uid; + if (!new->fsgid) + new->fsgid = exp->ex_anon_gid; - if (!cred.cr_group_info) - return -ENOMEM; - ret = set_groups(act_as, cred.cr_group_info); - put_group_info(cred.cr_group_info); - if ((cred.cr_uid)) { - act_as->cap_effective = - cap_drop_nfsd_set(act_as->cap_effective); + gi = groups_alloc(rqgi->ngroups); + if (!gi) + goto oom; + + for (i = 0; i < rqgi->ngroups; i++) { + if (!GROUP_AT(rqgi, i)) + GROUP_AT(gi, i) = exp->ex_anon_gid; + else + GROUP_AT(gi, i) = GROUP_AT(rqgi, i); + } } else { - act_as->cap_effective = - cap_raise_nfsd_set(act_as->cap_effective, - act_as->cap_permitted); + gi = get_group_info(rqgi); } + + if (new->fsuid == (uid_t) -1) + new->fsuid = exp->ex_anon_uid; + if (new->fsgid == (gid_t) -1) + new->fsgid = exp->ex_anon_gid; + + ret = set_groups(new, gi); + put_group_info(gi); + if (!ret) + goto error; + + if (new->uid) + new->cap_effective = cap_drop_nfsd_set(new->cap_effective); + else + new->cap_effective = cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted); + return commit_creds(new); + +oom: + ret = -ENOMEM; +error: + abort_creds(new); return ret; } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 632a50b4b371..9371ea12d7fa 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -54,20 +54,26 @@ static struct path rec_dir; static int rec_dir_init = 0; -static void -nfs4_save_user(uid_t *saveuid, gid_t *savegid) +static int +nfs4_save_creds(const struct cred **original_creds) { - *saveuid = current->cred->fsuid; - *savegid = current->cred->fsgid; - current->cred->fsuid = 0; - current->cred->fsgid = 0; + struct cred *new; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + + new->fsuid = 0; + new->fsgid = 0; + *original_creds = override_creds(new); + put_cred(new); + return 0; } static void -nfs4_reset_user(uid_t saveuid, gid_t savegid) +nfs4_reset_creds(const struct cred *original) { - current->cred->fsuid = saveuid; - current->cred->fsgid = savegid; + revert_creds(original); } static void @@ -129,10 +135,9 @@ nfsd4_sync_rec_dir(void) int nfsd4_create_clid_dir(struct nfs4_client *clp) { + const struct cred *original_cred; char *dname = clp->cl_recdir; struct dentry *dentry; - uid_t uid; - gid_t gid; int status; dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); @@ -140,7 +145,9 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) if (!rec_dir_init || clp->cl_firststate) return 0; - nfs4_save_user(&uid, &gid); + status = nfs4_save_creds(&original_cred); + if (status < 0) + return status; /* lock the parent */ mutex_lock(&rec_dir.dentry->d_inode->i_mutex); @@ -168,7 +175,7 @@ out_unlock: clp->cl_firststate = 1; nfsd4_sync_rec_dir(); } - nfs4_reset_user(uid, gid); + nfs4_reset_creds(original_cred); dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status); return status; } @@ -211,20 +218,21 @@ nfsd4_build_dentrylist(void *arg, const char *name, int namlen, static int nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) { + const struct cred *original_cred; struct file *filp; struct dentry_list_arg dla = { .parent = dir, }; struct list_head *dentries = &dla.dentries; struct dentry_list *child; - uid_t uid; - gid_t gid; int status; if (!rec_dir_init) return 0; - nfs4_save_user(&uid, &gid); + status = nfs4_save_creds(&original_cred); + if (status < 0) + return status; filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY, current_cred()); @@ -250,7 +258,7 @@ out: dput(child->dentry); kfree(child); } - nfs4_reset_user(uid, gid); + nfs4_reset_creds(original_cred); return status; } @@ -312,8 +320,7 @@ out: void nfsd4_remove_clid_dir(struct nfs4_client *clp) { - uid_t uid; - gid_t gid; + const struct cred *original_cred; int status; if (!rec_dir_init || !clp->cl_firststate) @@ -323,9 +330,13 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) if (status) goto out; clp->cl_firststate = 0; - nfs4_save_user(&uid, &gid); + + status = nfs4_save_creds(&original_cred); + if (status < 0) + goto out; + status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1); - nfs4_reset_user(uid, gid); + nfs4_reset_creds(original_cred); if (status == 0) nfsd4_sync_rec_dir(); mnt_drop_write(rec_dir.mnt); @@ -402,16 +413,21 @@ nfsd4_recdir_load(void) { void nfsd4_init_recdir(char *rec_dirname) { - uid_t uid = 0; - gid_t gid = 0; - int status; + const struct cred *original_cred; + int status; printk("NFSD: Using %s as the NFSv4 state recovery directory\n", rec_dirname); BUG_ON(rec_dir_init); - nfs4_save_user(&uid, &gid); + status = nfs4_save_creds(&original_cred); + if (status < 0) { + printk("NFSD: Unable to change credentials to find recovery" + " directory: error %d\n", + status); + return; + } status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &rec_dir); @@ -421,7 +437,7 @@ nfsd4_init_recdir(char *rec_dirname) if (!status) rec_dir_init = 1; - nfs4_reset_user(uid, gid); + nfs4_reset_creds(original_cred); } void diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index e67cfaea0865..f0da7d9c3a92 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -186,9 +186,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) * access control settings being in effect, we cannot * fix that case easily. */ - current->cred->cap_effective = - cap_raise_nfsd_set(current->cred->cap_effective, - current->cred->cap_permitted); + struct cred *new = prepare_creds(); + if (!new) + return nfserrno(-ENOMEM); + new->cap_effective = + cap_raise_nfsd_set(new->cap_effective, + new->cap_permitted); + put_cred(override_creds(new)); + put_cred(new); } else { error = nfsd_setuser_and_check_port(rqstp, exp); if (error) diff --git a/fs/open.c b/fs/open.c index f96eaab280a3..c0a426d5766c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -425,30 +425,33 @@ out: */ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) { - struct cred *cred = current->cred; + const struct cred *old_cred; + struct cred *override_cred; struct path path; struct inode *inode; - int old_fsuid, old_fsgid; - kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */ int res; if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; - old_fsuid = cred->fsuid; - old_fsgid = cred->fsgid; + override_cred = prepare_creds(); + if (!override_cred) + return -ENOMEM; - cred->fsuid = cred->uid; - cred->fsgid = cred->gid; + override_cred->fsuid = override_cred->uid; + override_cred->fsgid = override_cred->gid; if (!issecure(SECURE_NO_SETUID_FIXUP)) { /* Clear the capabilities if we switch to a non-root user */ - if (current->cred->uid) - old_cap = cap_set_effective(__cap_empty_set); + if (override_cred->uid) + cap_clear(override_cred->cap_effective); else - old_cap = cap_set_effective(cred->cap_permitted); + override_cred->cap_effective = + override_cred->cap_permitted; } + old_cred = override_creds(override_cred); + res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); if (res) goto out; @@ -485,12 +488,8 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) out_path_release: path_put(&path); out: - cred->fsuid = old_fsuid; - cred->fsgid = old_fsgid; - - if (!issecure(SECURE_NO_SETUID_FIXUP)) - cap_set_effective(old_cap); - + revert_creds(old_cred); + put_cred(override_cred); return res; } diff --git a/include/linux/audit.h b/include/linux/audit.h index 6fbebac7b1bf..0b2fcb698a63 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -454,8 +454,10 @@ extern int __audit_mq_timedsend(mqd_t mqdes, size_t msg_len, unsigned int msg_pr extern int __audit_mq_timedreceive(mqd_t mqdes, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout); extern int __audit_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification); extern int __audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat); -extern void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE); -extern int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm); +extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, + const struct cred *new, + const struct cred *old); +extern int __audit_log_capset(pid_t pid, const struct cred *new, const struct cred *old); static inline int audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -522,16 +524,20 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) * * -Eric */ -static inline void audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE) +static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm, + const struct cred *new, + const struct cred *old) { if (unlikely(!audit_dummy_context())) - __audit_log_bprm_fcaps(bprm, pP, pE); + return __audit_log_bprm_fcaps(bprm, new, old); + return 0; } -static inline int audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm) +static inline int audit_log_capset(pid_t pid, const struct cred *new, + const struct cred *old) { if (unlikely(!audit_dummy_context())) - return __audit_log_capset(pid, eff, inh, perm); + return __audit_log_capset(pid, new, old); return 0; } @@ -566,8 +572,8 @@ extern int audit_signals; #define audit_mq_timedreceive(d,l,p,t) ({ 0; }) #define audit_mq_notify(d,n) ({ 0; }) #define audit_mq_getsetattr(d,s) ({ 0; }) -#define audit_log_bprm_fcaps(b, p, e) do { ; } while (0) -#define audit_log_capset(pid, e, i, p) ({ 0; }) +#define audit_log_bprm_fcaps(b, ncr, ocr) ({ 0; }) +#define audit_log_capset(pid, ncr, ocr) ({ 0; }) #define audit_ptrace(t) ((void)0) #define audit_n_rules 0 #define audit_signals 0 diff --git a/include/linux/capability.h b/include/linux/capability.h index 7f26580a5a4d..e22f48c2a46f 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -519,8 +519,6 @@ extern const kernel_cap_t __cap_empty_set; extern const kernel_cap_t __cap_full_set; extern const kernel_cap_t __cap_init_eff_set; -kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); - /** * has_capability - Determine if a task has a superior capability available * @t: The task in question diff --git a/include/linux/cred.h b/include/linux/cred.h index 62b9e532422d..eaf6fa695a04 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -84,6 +84,8 @@ struct thread_group_cred { struct key *process_keyring; /* keyring private to this process */ struct rcu_head rcu; /* RCU deletion hook */ }; + +extern void release_tgcred(struct cred *cred); #endif /* @@ -137,11 +139,30 @@ struct cred { struct user_struct *user; /* real user ID subscription */ struct group_info *group_info; /* supplementary groups for euid/fsgid */ struct rcu_head rcu; /* RCU deletion hook */ - spinlock_t lock; /* lock for pointer changes */ }; extern void __put_cred(struct cred *); extern int copy_creds(struct task_struct *, unsigned long); +extern struct cred *prepare_creds(void); +extern struct cred *prepare_usermodehelper_creds(void); +extern int commit_creds(struct cred *); +extern void abort_creds(struct cred *); +extern const struct cred *override_creds(const struct cred *) __deprecated; +extern void revert_creds(const struct cred *) __deprecated; +extern void __init cred_init(void); + +/** + * get_new_cred - Get a reference on a new set of credentials + * @cred: The new credentials to reference + * + * Get a reference on the specified set of new credentials. The caller must + * release the reference. + */ +static inline struct cred *get_new_cred(struct cred *cred) +{ + atomic_inc(&cred->usage); + return cred; +} /** * get_cred - Get a reference on a set of credentials @@ -150,10 +171,9 @@ extern int copy_creds(struct task_struct *, unsigned long); * Get a reference on the specified set of credentials. The caller must * release the reference. */ -static inline struct cred *get_cred(struct cred *cred) +static inline const struct cred *get_cred(const struct cred *cred) { - atomic_inc(&cred->usage); - return cred; + return get_new_cred((struct cred *) cred); } /** @@ -166,6 +186,8 @@ static inline struct cred *get_cred(struct cred *cred) static inline void put_cred(const struct cred *_cred) { struct cred *cred = (struct cred *) _cred; + + BUG_ON(atomic_read(&(cred)->usage) <= 0); if (atomic_dec_and_test(&(cred)->usage)) __put_cred(cred); } @@ -250,13 +272,13 @@ static inline void put_cred(const struct cred *_cred) __groups; \ }) -#define task_cred_xxx(task, xxx) \ -({ \ - __typeof__(task->cred->xxx) ___val; \ - rcu_read_lock(); \ - ___val = __task_cred((task))->xxx; \ - rcu_read_unlock(); \ - ___val; \ +#define task_cred_xxx(task, xxx) \ +({ \ + __typeof__(((struct cred *)NULL)->xxx) ___val; \ + rcu_read_lock(); \ + ___val = __task_cred((task))->xxx; \ + rcu_read_unlock(); \ + ___val; \ }) #define task_uid(task) (task_cred_xxx((task), uid)) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 5e24c54b6dfd..08c3b24ad9a8 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -150,6 +150,8 @@ extern struct cred init_cred; .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ .cred = &init_cred, \ + .cred_exec_mutex = \ + __MUTEX_INITIALIZER(tsk.cred_exec_mutex), \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/key.h b/include/linux/key.h index 0836cc838b0c..69ecf0934b02 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -73,6 +73,7 @@ struct key; struct seq_file; struct user_struct; struct signal_struct; +struct cred; struct key_type; struct key_owner; @@ -181,7 +182,7 @@ struct key { extern struct key *key_alloc(struct key_type *type, const char *desc, uid_t uid, gid_t gid, - struct task_struct *ctx, + const struct cred *cred, key_perm_t perm, unsigned long flags); @@ -249,7 +250,7 @@ extern int key_unlink(struct key *keyring, struct key *key); extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, - struct task_struct *ctx, + const struct cred *cred, unsigned long flags, struct key *dest); @@ -276,22 +277,12 @@ extern ctl_table key_sysctls[]; /* * the userspace interface */ -extern void switch_uid_keyring(struct user_struct *new_user); -extern int copy_keys(unsigned long clone_flags, struct task_struct *tsk); -extern void exit_keys(struct task_struct *tsk); -extern int suid_keys(struct task_struct *tsk); +extern int install_thread_keyring_to_cred(struct cred *cred); extern int exec_keys(struct task_struct *tsk); extern void key_fsuid_changed(struct task_struct *tsk); extern void key_fsgid_changed(struct task_struct *tsk); extern void key_init(void); -#define __install_session_keyring(keyring) \ -({ \ - struct key *old_session = current->cred->tgcred->session_keyring; \ - current->cred->tgcred->session_keyring = keyring; \ - old_session; \ -}) - #else /* CONFIG_KEYS */ #define key_validate(k) 0 @@ -303,11 +294,6 @@ extern void key_init(void); #define make_key_ref(k, p) NULL #define key_ref_to_ptr(k) NULL #define is_key_possessed(k) 0 -#define switch_uid_keyring(u) do { } while(0) -#define __install_session_keyring(k) ({ NULL; }) -#define copy_keys(f,t) 0 -#define exit_keys(t) do { } while(0) -#define suid_keys(t) do { } while(0) #define exec_keys(t) do { } while(0) #define key_fsuid_changed(t) do { } while(0) #define key_fsgid_changed(t) do { } while(0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 2913252989b3..121d655e460d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1145,7 +1145,8 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - struct cred *cred; /* actual/objective task credentials */ + const struct cred *cred; /* actual/objective task credentials (COW) */ + struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */ char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock @@ -1720,7 +1721,6 @@ static inline struct user_struct *get_uid(struct user_struct *u) return u; } extern void free_uid(struct user_struct *); -extern void switch_uid(struct user_struct *); extern void release_uids(struct user_namespace *ns); #include @@ -1870,6 +1870,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p, #define for_each_process(p) \ for (p = &init_task ; (p = next_task(p)) != &init_task ; ) +extern bool is_single_threaded(struct task_struct *); + /* * Careful: do_each_thread/while_each_thread is a double loop so * 'break' will not work as expected - use goto instead. diff --git a/include/linux/security.h b/include/linux/security.h index 7e9fe046a0d1..68be11251447 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -53,24 +53,21 @@ extern int cap_settime(struct timespec *ts, struct timezone *tz); extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode); extern int cap_ptrace_traceme(struct task_struct *parent); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -extern int cap_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); -extern void cap_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); +extern int cap_capset(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); extern int cap_bprm_set_security(struct linux_binprm *bprm); -extern void cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); +extern int cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); extern int cap_bprm_secureexec(struct linux_binprm *bprm); extern int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); -extern int cap_task_post_setuid(uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); -extern void cap_task_reparent_to_init(struct task_struct *p); +extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5, long *rc_p); + unsigned long arg4, unsigned long arg5); extern int cap_task_setscheduler(struct task_struct *p, int policy, struct sched_param *lp); extern int cap_task_setioprio(struct task_struct *p, int ioprio); extern int cap_task_setnice(struct task_struct *p, int nice); @@ -170,8 +167,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Compute and set the security attributes of a process being transformed * by an execve operation based on the old attributes (current->security) * and the information saved in @bprm->security by the set_security hook. - * Since this hook function (and its caller) are void, this hook can not - * return an error. However, it can leave the security attributes of the + * Since this function may return an error, in which case the process will + * be killed. However, it can leave the security attributes of the * process unchanged if an access failure occurs at this point. * bprm_apply_creds is called under task_lock. @unsafe indicates various * reasons why it may be unsafe to change security state. @@ -593,15 +590,18 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * manual page for definitions of the @clone_flags. * @clone_flags contains the flags indicating what should be shared. * Return 0 if permission is granted. - * @cred_alloc_security: - * @cred contains the cred struct for child process. - * Allocate and attach a security structure to the cred->security field. - * The security field is initialized to NULL when the task structure is - * allocated. - * Return 0 if operation was successful. * @cred_free: * @cred points to the credentials. * Deallocate and clear the cred->security field in a set of credentials. + * @cred_prepare: + * @new points to the new credentials. + * @old points to the original credentials. + * @gfp indicates the atomicity of any memory allocations. + * Prepare a new set of credentials by copying the data from the old set. + * @cred_commit: + * @new points to the new credentials. + * @old points to the original credentials. + * Install a new set of credentials. * @task_setuid: * Check permission before setting one or more of the user identity * attributes of the current process. The @flags parameter indicates @@ -614,15 +614,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @id2 contains a uid. * @flags contains one of the LSM_SETID_* values. * Return 0 if permission is granted. - * @task_post_setuid: + * @task_fix_setuid: * Update the module's state after setting one or more of the user * identity attributes of the current process. The @flags parameter * indicates which of the set*uid system calls invoked this hook. If - * @flags is LSM_SETID_FS, then @old_ruid is the old fs uid and the other - * parameters are not used. - * @old_ruid contains the old real uid (or fs uid if LSM_SETID_FS). - * @old_euid contains the old effective uid (or -1 if LSM_SETID_FS). - * @old_suid contains the old saved uid (or -1 if LSM_SETID_FS). + * @new is the set of credentials that will be installed. Modifications + * should be made to this rather than to @current->cred. + * @old is the set of credentials that are being replaces * @flags contains one of the LSM_SETID_* values. * Return 0 on success. * @task_setgid: @@ -725,13 +723,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @arg3 contains a argument. * @arg4 contains a argument. * @arg5 contains a argument. - * @rc_p contains a pointer to communicate back the forced return code - * Return 0 if permission is granted, and non-zero if the security module - * has taken responsibility (setting *rc_p) for the prctl call. - * @task_reparent_to_init: - * Set the security attributes in @p->security for a kernel thread that - * is being reparented to the init task. - * @p contains the task_struct for the kernel thread. + * Return -ENOSYS if no-one wanted to handle this op, any other value to + * cause prctl() to return immediately with that value. * @task_to_inode: * Set the security attributes for an inode based on an associated task's * security attributes, e.g. for /proc/pid inodes. @@ -1008,7 +1001,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * See whether a specific operational right is granted to a process on a * key. * @key_ref refers to the key (key pointer + possession attribute bit). - * @context points to the process to provide the context against which to + * @cred points to the credentials to provide the context against which to * evaluate the security data on the key. * @perm describes the combination of permissions required of this key. * Return 1 if permission granted, 0 if permission denied and -ve it the @@ -1170,6 +1163,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @child process. * Security modules may also want to perform a process tracing check * during an execve in the set_security or apply_creds hooks of + * tracing check during an execve in the bprm_set_creds hook of * binprm_security_ops if the process is being traced and its security * attributes would be changed by the execve. * @child contains the task_struct structure for the target process. @@ -1193,19 +1187,15 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. * Return 0 if the capability sets were successfully obtained. - * @capset_check: - * Check permission before setting the @effective, @inheritable, and - * @permitted capability sets for the current process. - * @effective contains the effective capability set. - * @inheritable contains the inheritable capability set. - * @permitted contains the permitted capability set. - * Return 0 if permission is granted. - * @capset_set: + * @capset: * Set the @effective, @inheritable, and @permitted capability sets for * the current process. + * @new contains the new credentials structure for target process. + * @old contains the current credentials structure for target process. * @effective contains the effective capability set. * @inheritable contains the inheritable capability set. * @permitted contains the permitted capability set. + * Return 0 and update @new if permission is granted. * @capable: * Check whether the @tsk process has the @cap capability. * @tsk contains the task_struct for the process. @@ -1297,12 +1287,11 @@ struct security_operations { int (*capget) (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); - int (*capset_check) (const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); - void (*capset_set) (const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); + int (*capset) (struct cred *new, + const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); int (*capable) (struct task_struct *tsk, int cap, int audit); int (*acct) (struct file *file); int (*sysctl) (struct ctl_table *table, int op); @@ -1314,7 +1303,7 @@ struct security_operations { int (*bprm_alloc_security) (struct linux_binprm *bprm); void (*bprm_free_security) (struct linux_binprm *bprm); - void (*bprm_apply_creds) (struct linux_binprm *bprm, int unsafe); + int (*bprm_apply_creds) (struct linux_binprm *bprm, int unsafe); void (*bprm_post_apply_creds) (struct linux_binprm *bprm); int (*bprm_set_security) (struct linux_binprm *bprm); int (*bprm_check_security) (struct linux_binprm *bprm); @@ -1405,11 +1394,13 @@ struct security_operations { int (*dentry_open) (struct file *file, const struct cred *cred); int (*task_create) (unsigned long clone_flags); - int (*cred_alloc_security) (struct cred *cred); void (*cred_free) (struct cred *cred); + int (*cred_prepare)(struct cred *new, const struct cred *old, + gfp_t gfp); + void (*cred_commit)(struct cred *new, const struct cred *old); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); - int (*task_post_setuid) (uid_t old_ruid /* or fsuid */ , - uid_t old_euid, uid_t old_suid, int flags); + int (*task_fix_setuid) (struct cred *new, const struct cred *old, + int flags); int (*task_setgid) (gid_t id0, gid_t id1, gid_t id2, int flags); int (*task_setpgid) (struct task_struct *p, pid_t pgid); int (*task_getpgid) (struct task_struct *p); @@ -1429,8 +1420,7 @@ struct security_operations { int (*task_wait) (struct task_struct *p); int (*task_prctl) (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, - unsigned long arg5, long *rc_p); - void (*task_reparent_to_init) (struct task_struct *p); + unsigned long arg5); void (*task_to_inode) (struct task_struct *p, struct inode *inode); int (*ipc_permission) (struct kern_ipc_perm *ipcp, short flag); @@ -1535,10 +1525,10 @@ struct security_operations { /* key management security hooks */ #ifdef CONFIG_KEYS - int (*key_alloc) (struct key *key, struct task_struct *tsk, unsigned long flags); + int (*key_alloc) (struct key *key, const struct cred *cred, unsigned long flags); void (*key_free) (struct key *key); int (*key_permission) (key_ref_t key_ref, - struct task_struct *context, + const struct cred *cred, key_perm_t perm); int (*key_getsecurity)(struct key *key, char **_buffer); #endif /* CONFIG_KEYS */ @@ -1564,12 +1554,10 @@ int security_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); -int security_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); -void security_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted); +int security_capset(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted); int security_capable(struct task_struct *tsk, int cap); int security_capable_noaudit(struct task_struct *tsk, int cap); int security_acct(struct file *file); @@ -1583,7 +1571,7 @@ int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_vm_enough_memory_kern(long pages); int security_bprm_alloc(struct linux_binprm *bprm); void security_bprm_free(struct linux_binprm *bprm); -void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); +int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); void security_bprm_post_apply_creds(struct linux_binprm *bprm); int security_bprm_set(struct linux_binprm *bprm); int security_bprm_check(struct linux_binprm *bprm); @@ -1660,11 +1648,12 @@ int security_file_send_sigiotask(struct task_struct *tsk, int security_file_receive(struct file *file); int security_dentry_open(struct file *file, const struct cred *cred); int security_task_create(unsigned long clone_flags); -int security_cred_alloc(struct cred *cred); void security_cred_free(struct cred *cred); +int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp); +void security_commit_creds(struct cred *new, const struct cred *old); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); -int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, - uid_t old_suid, int flags); +int security_task_fix_setuid(struct cred *new, const struct cred *old, + int flags); int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags); int security_task_setpgid(struct task_struct *p, pid_t pgid); int security_task_getpgid(struct task_struct *p); @@ -1683,8 +1672,7 @@ int security_task_kill(struct task_struct *p, struct siginfo *info, int sig, u32 secid); int security_task_wait(struct task_struct *p); int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5, long *rc_p); -void security_task_reparent_to_init(struct task_struct *p); + unsigned long arg4, unsigned long arg5); void security_task_to_inode(struct task_struct *p, struct inode *inode); int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag); void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid); @@ -1759,18 +1747,13 @@ static inline int security_capget(struct task_struct *target, return cap_capget(target, effective, inheritable, permitted); } -static inline int security_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) +static inline int security_capset(struct cred *new, + const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { - return cap_capset_check(effective, inheritable, permitted); -} - -static inline void security_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) -{ - cap_capset_set(effective, inheritable, permitted); + return cap_capset(new, old, effective, inheritable, permitted); } static inline int security_capable(struct task_struct *tsk, int cap) @@ -1837,9 +1820,9 @@ static inline int security_bprm_alloc(struct linux_binprm *bprm) static inline void security_bprm_free(struct linux_binprm *bprm) { } -static inline void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +static inline int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) { - cap_bprm_apply_creds(bprm, unsafe); + return cap_bprm_apply_creds(bprm, unsafe); } static inline void security_bprm_post_apply_creds(struct linux_binprm *bprm) @@ -2182,13 +2165,20 @@ static inline int security_task_create(unsigned long clone_flags) return 0; } -static inline int security_cred_alloc(struct cred *cred) +static inline void security_cred_free(struct cred *cred) +{ } + +static inline int security_prepare_creds(struct cred *new, + const struct cred *old, + gfp_t gfp) { return 0; } -static inline void security_cred_free(struct cred *cred) -{ } +static inline void security_commit_creds(struct cred *new, + const struct cred *old) +{ +} static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) @@ -2196,10 +2186,11 @@ static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, return 0; } -static inline int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, - uid_t old_suid, int flags) +static inline int security_task_fix_setuid(struct cred *new, + const struct cred *old, + int flags) { - return cap_task_post_setuid(old_ruid, old_euid, old_suid, flags); + return cap_task_fix_setuid(new, old, flags); } static inline int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, @@ -2286,14 +2277,9 @@ static inline int security_task_wait(struct task_struct *p) static inline int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, - unsigned long arg5, long *rc_p) -{ - return cap_task_prctl(option, arg2, arg3, arg3, arg5, rc_p); -} - -static inline void security_task_reparent_to_init(struct task_struct *p) + unsigned long arg5) { - cap_task_reparent_to_init(p); + return cap_task_prctl(option, arg2, arg3, arg3, arg5); } static inline void security_task_to_inode(struct task_struct *p, struct inode *inode) @@ -2719,16 +2705,16 @@ static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi #ifdef CONFIG_KEYS #ifdef CONFIG_SECURITY -int security_key_alloc(struct key *key, struct task_struct *tsk, unsigned long flags); +int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags); void security_key_free(struct key *key); int security_key_permission(key_ref_t key_ref, - struct task_struct *context, key_perm_t perm); + const struct cred *cred, key_perm_t perm); int security_key_getsecurity(struct key *key, char **_buffer); #else static inline int security_key_alloc(struct key *key, - struct task_struct *tsk, + const struct cred *cred, unsigned long flags) { return 0; @@ -2739,7 +2725,7 @@ static inline void security_key_free(struct key *key) } static inline int security_key_permission(key_ref_t key_ref, - struct task_struct *context, + const struct cred *cred, key_perm_t perm) { return 0; diff --git a/init/main.c b/init/main.c index 7e117a231af1..db843bff5732 100644 --- a/init/main.c +++ b/init/main.c @@ -669,6 +669,7 @@ asmlinkage void __init start_kernel(void) efi_enter_virtual_mode(); #endif thread_info_cache_init(); + cred_init(); fork_init(num_physpages); proc_caches_init(); buffer_init(); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ae8ef88ade3f..bc1e2d854bf6 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2546,18 +2546,17 @@ int __audit_signal_info(int sig, struct task_struct *t) /** * __audit_log_bprm_fcaps - store information about a loading bprm and relevant fcaps - * @bprm pointer to the bprm being processed - * @caps the caps read from the disk + * @bprm: pointer to the bprm being processed + * @new: the proposed new credentials + * @old: the old credentials * * Simply check if the proc already has the caps given by the file and if not * store the priv escalation info for later auditing at the end of the syscall * - * this can fail and we don't care. See the note in audit.h for - * audit_log_bprm_fcaps() for my explaination.... - * * -Eric */ -void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_cap_t *pE) +int __audit_log_bprm_fcaps(struct linux_binprm *bprm, + const struct cred *new, const struct cred *old) { struct audit_aux_data_bprm_fcaps *ax; struct audit_context *context = current->audit_context; @@ -2566,7 +2565,7 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_ ax = kmalloc(sizeof(*ax), GFP_KERNEL); if (!ax) - return; + return -ENOMEM; ax->d.type = AUDIT_BPRM_FCAPS; ax->d.next = context->aux; @@ -2581,26 +2580,27 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_ ax->fcap.fE = !!(vcaps.magic_etc & VFS_CAP_FLAGS_EFFECTIVE); ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; - ax->old_pcap.permitted = *pP; - ax->old_pcap.inheritable = current->cred->cap_inheritable; - ax->old_pcap.effective = *pE; + ax->old_pcap.permitted = old->cap_permitted; + ax->old_pcap.inheritable = old->cap_inheritable; + ax->old_pcap.effective = old->cap_effective; - ax->new_pcap.permitted = current->cred->cap_permitted; - ax->new_pcap.inheritable = current->cred->cap_inheritable; - ax->new_pcap.effective = current->cred->cap_effective; + ax->new_pcap.permitted = new->cap_permitted; + ax->new_pcap.inheritable = new->cap_inheritable; + ax->new_pcap.effective = new->cap_effective; + return 0; } /** * __audit_log_capset - store information about the arguments to the capset syscall - * @pid target pid of the capset call - * @eff effective cap set - * @inh inheritible cap set - * @perm permited cap set + * @pid: target pid of the capset call + * @new: the new credentials + * @old: the old (current) credentials * * Record the aguments userspace sent to sys_capset for later printing by the * audit system if applicable */ -int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_cap_t *perm) +int __audit_log_capset(pid_t pid, + const struct cred *new, const struct cred *old) { struct audit_aux_data_capset *ax; struct audit_context *context = current->audit_context; @@ -2617,9 +2617,9 @@ int __audit_log_capset(pid_t pid, kernel_cap_t *eff, kernel_cap_t *inh, kernel_c context->aux = (void *)ax; ax->pid = pid; - ax->cap.effective = *eff; - ax->cap.inheritable = *eff; - ax->cap.permitted = *perm; + ax->cap.effective = new->cap_effective; + ax->cap.inheritable = new->cap_effective; + ax->cap.permitted = new->cap_permitted; return 0; } diff --git a/kernel/capability.c b/kernel/capability.c index a404b980b1bd..36b4b4daebec 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -15,12 +15,7 @@ #include #include #include - -/* - * This lock protects task->cap_* for all tasks including current. - * Locking rule: acquire this prior to tasklist_lock. - */ -static DEFINE_SPINLOCK(task_capability_lock); +#include "cred-internals.h" /* * Leveraged for setting/resetting capabilities @@ -128,12 +123,11 @@ static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy) } /* - * If we have configured with filesystem capability support, then the - * only thing that can change the capabilities of the current process - * is the current process. As such, we can't be in this code at the - * same time as we are in the process of setting capabilities in this - * process. The net result is that we can limit our use of locks to - * when we are reading the caps of another process. + * The only thing that can change the capabilities of the current + * process is the current process. As such, we can't be in this code + * at the same time as we are in the process of setting capabilities + * in this process. The net result is that we can limit our use of + * locks to when we are reading the caps of another process. */ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, kernel_cap_t *pIp, kernel_cap_t *pPp) @@ -143,7 +137,6 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, if (pid && (pid != task_pid_vnr(current))) { struct task_struct *target; - spin_lock(&task_capability_lock); read_lock(&tasklist_lock); target = find_task_by_vpid(pid); @@ -153,34 +146,12 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, ret = security_capget(target, pEp, pIp, pPp); read_unlock(&tasklist_lock); - spin_unlock(&task_capability_lock); } else ret = security_capget(current, pEp, pIp, pPp); return ret; } -/* - * Atomically modify the effective capabilities returning the original - * value. No permission check is performed here - it is assumed that the - * caller is permitted to set the desired effective capabilities. - */ -kernel_cap_t cap_set_effective(const kernel_cap_t pE_new) -{ - kernel_cap_t pE_old; - - spin_lock(&task_capability_lock); - - pE_old = current->cred->cap_effective; - current->cred->cap_effective = pE_new; - - spin_unlock(&task_capability_lock); - - return pE_old; -} - -EXPORT_SYMBOL(cap_set_effective); - /** * sys_capget - get the capabilities of a given process. * @header: pointer to struct that contains capability version and @@ -208,7 +179,6 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) return -EINVAL; ret = cap_get_target_pid(pid, &pE, &pI, &pP); - if (!ret) { struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; unsigned i; @@ -270,6 +240,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; unsigned i, tocopy; kernel_cap_t inheritable, permitted, effective; + struct cred *new; int ret; pid_t pid; @@ -284,8 +255,8 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) if (pid != 0 && pid != task_pid_vnr(current)) return -EPERM; - if (copy_from_user(&kdata, data, tocopy - * sizeof(struct __user_cap_data_struct))) + if (copy_from_user(&kdata, data, + tocopy * sizeof(struct __user_cap_data_struct))) return -EFAULT; for (i = 0; i < tocopy; i++) { @@ -300,24 +271,23 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) i++; } - ret = audit_log_capset(pid, &effective, &inheritable, &permitted); - if (ret) + new = prepare_creds(); + if (!new) + return -ENOMEM; + + ret = security_capset(new, current_cred(), + &effective, &inheritable, &permitted); + if (ret < 0) + goto error; + + ret = audit_log_capset(pid, new, current_cred()); + if (ret < 0) return ret; - /* This lock is required even when filesystem capability support is - * configured - it protects the sys_capget() call from returning - * incorrect data in the case that the targeted process is not the - * current one. - */ - spin_lock(&task_capability_lock); - - ret = security_capset_check(&effective, &inheritable, &permitted); - /* Having verified that the proposed changes are legal, we now put them - * into effect. - */ - if (!ret) - security_capset_set(&effective, &inheritable, &permitted); - spin_unlock(&task_capability_lock); + return commit_creds(new); + +error: + abort_creds(new); return ret; } diff --git a/kernel/cred-internals.h b/kernel/cred-internals.h new file mode 100644 index 000000000000..2dc4fc2d0bf1 --- /dev/null +++ b/kernel/cred-internals.h @@ -0,0 +1,21 @@ +/* Internal credentials stuff + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +/* + * user.c + */ +static inline void sched_switch_user(struct task_struct *p) +{ +#ifdef CONFIG_USER_SCHED + sched_move_task(p); +#endif /* CONFIG_USER_SCHED */ +} + diff --git a/kernel/cred.c b/kernel/cred.c index ac73e3617684..cb6b5eda978d 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -15,6 +15,10 @@ #include #include #include +#include +#include "cred-internals.h" + +static struct kmem_cache *cred_jar; /* * The common credentials for the initial task's thread group @@ -64,7 +68,7 @@ static void release_tgcred_rcu(struct rcu_head *rcu) /* * Release a set of thread group credentials. */ -static void release_tgcred(struct cred *cred) +void release_tgcred(struct cred *cred) { #ifdef CONFIG_KEYS struct thread_group_cred *tgcred = cred->tgcred; @@ -81,79 +85,322 @@ static void put_cred_rcu(struct rcu_head *rcu) { struct cred *cred = container_of(rcu, struct cred, rcu); - BUG_ON(atomic_read(&cred->usage) != 0); + if (atomic_read(&cred->usage) != 0) + panic("CRED: put_cred_rcu() sees %p with usage %d\n", + cred, atomic_read(&cred->usage)); + security_cred_free(cred); key_put(cred->thread_keyring); key_put(cred->request_key_auth); release_tgcred(cred); put_group_info(cred->group_info); free_uid(cred->user); - security_cred_free(cred); - kfree(cred); + kmem_cache_free(cred_jar, cred); } /** * __put_cred - Destroy a set of credentials - * @sec: The record to release + * @cred: The record to release * * Destroy a set of credentials on which no references remain. */ void __put_cred(struct cred *cred) { + BUG_ON(atomic_read(&cred->usage) != 0); + call_rcu(&cred->rcu, put_cred_rcu); } EXPORT_SYMBOL(__put_cred); +/** + * prepare_creds - Prepare a new set of credentials for modification + * + * Prepare a new set of task credentials for modification. A task's creds + * shouldn't generally be modified directly, therefore this function is used to + * prepare a new copy, which the caller then modifies and then commits by + * calling commit_creds(). + * + * Returns a pointer to the new creds-to-be if successful, NULL otherwise. + * + * Call commit_creds() or abort_creds() to clean up. + */ +struct cred *prepare_creds(void) +{ + struct task_struct *task = current; + const struct cred *old; + struct cred *new; + + BUG_ON(atomic_read(&task->cred->usage) < 1); + + new = kmem_cache_alloc(cred_jar, GFP_KERNEL); + if (!new) + return NULL; + + old = task->cred; + memcpy(new, old, sizeof(struct cred)); + + atomic_set(&new->usage, 1); + get_group_info(new->group_info); + get_uid(new->user); + +#ifdef CONFIG_KEYS + key_get(new->thread_keyring); + key_get(new->request_key_auth); + atomic_inc(&new->tgcred->usage); +#endif + +#ifdef CONFIG_SECURITY + new->security = NULL; +#endif + + if (security_prepare_creds(new, old, GFP_KERNEL) < 0) + goto error; + return new; + +error: + abort_creds(new); + return NULL; +} +EXPORT_SYMBOL(prepare_creds); + +/* + * prepare new credentials for the usermode helper dispatcher + */ +struct cred *prepare_usermodehelper_creds(void) +{ +#ifdef CONFIG_KEYS + struct thread_group_cred *tgcred = NULL; +#endif + struct cred *new; + +#ifdef CONFIG_KEYS + tgcred = kzalloc(sizeof(*new->tgcred), GFP_ATOMIC); + if (!tgcred) + return NULL; +#endif + + new = kmem_cache_alloc(cred_jar, GFP_ATOMIC); + if (!new) + return NULL; + + memcpy(new, &init_cred, sizeof(struct cred)); + + atomic_set(&new->usage, 1); + get_group_info(new->group_info); + get_uid(new->user); + +#ifdef CONFIG_KEYS + new->thread_keyring = NULL; + new->request_key_auth = NULL; + new->jit_keyring = KEY_REQKEY_DEFL_DEFAULT; + + atomic_set(&tgcred->usage, 1); + spin_lock_init(&tgcred->lock); + new->tgcred = tgcred; +#endif + +#ifdef CONFIG_SECURITY + new->security = NULL; +#endif + if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0) + goto error; + + BUG_ON(atomic_read(&new->usage) != 1); + return new; + +error: + put_cred(new); + return NULL; +} + /* * Copy credentials for the new process created by fork() + * + * We share if we can, but under some circumstances we have to generate a new + * set. */ int copy_creds(struct task_struct *p, unsigned long clone_flags) { - struct cred *pcred; - int ret; +#ifdef CONFIG_KEYS + struct thread_group_cred *tgcred; +#endif + struct cred *new; + + mutex_init(&p->cred_exec_mutex); - pcred = kmemdup(p->cred, sizeof(*p->cred), GFP_KERNEL); - if (!pcred) + if ( +#ifdef CONFIG_KEYS + !p->cred->thread_keyring && +#endif + clone_flags & CLONE_THREAD + ) { + get_cred(p->cred); + atomic_inc(&p->cred->user->processes); + return 0; + } + + new = prepare_creds(); + if (!new) return -ENOMEM; #ifdef CONFIG_KEYS - if (clone_flags & CLONE_THREAD) { - atomic_inc(&pcred->tgcred->usage); - } else { - pcred->tgcred = kmalloc(sizeof(struct cred), GFP_KERNEL); - if (!pcred->tgcred) { - kfree(pcred); + /* new threads get their own thread keyrings if their parent already + * had one */ + if (new->thread_keyring) { + key_put(new->thread_keyring); + new->thread_keyring = NULL; + if (clone_flags & CLONE_THREAD) + install_thread_keyring_to_cred(new); + } + + /* we share the process and session keyrings between all the threads in + * a process - this is slightly icky as we violate COW credentials a + * bit */ + if (!(clone_flags & CLONE_THREAD)) { + tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); + if (!tgcred) { + put_cred(new); return -ENOMEM; } - atomic_set(&pcred->tgcred->usage, 1); - spin_lock_init(&pcred->tgcred->lock); - pcred->tgcred->process_keyring = NULL; - pcred->tgcred->session_keyring = - key_get(p->cred->tgcred->session_keyring); + atomic_set(&tgcred->usage, 1); + spin_lock_init(&tgcred->lock); + tgcred->process_keyring = NULL; + tgcred->session_keyring = key_get(new->tgcred->session_keyring); + + release_tgcred(new); + new->tgcred = tgcred; } #endif -#ifdef CONFIG_SECURITY - pcred->security = NULL; -#endif + atomic_inc(&new->user->processes); + p->cred = new; + return 0; +} - ret = security_cred_alloc(pcred); - if (ret < 0) { - release_tgcred(pcred); - kfree(pcred); - return ret; +/** + * commit_creds - Install new credentials upon the current task + * @new: The credentials to be assigned + * + * Install a new set of credentials to the current task, using RCU to replace + * the old set. + * + * This function eats the caller's reference to the new credentials. + * + * Always returns 0 thus allowing this function to be tail-called at the end + * of, say, sys_setgid(). + */ +int commit_creds(struct cred *new) +{ + struct task_struct *task = current; + const struct cred *old; + + BUG_ON(atomic_read(&new->usage) < 1); + BUG_ON(atomic_read(&task->cred->usage) < 1); + + old = task->cred; + security_commit_creds(new, old); + + /* dumpability changes */ + if (old->euid != new->euid || + old->egid != new->egid || + old->fsuid != new->fsuid || + old->fsgid != new->fsgid || + !cap_issubset(new->cap_permitted, old->cap_permitted)) { + set_dumpable(task->mm, suid_dumpable); + task->pdeath_signal = 0; + smp_wmb(); } - atomic_set(&pcred->usage, 1); - get_group_info(pcred->group_info); - get_uid(pcred->user); - key_get(pcred->thread_keyring); - key_get(pcred->request_key_auth); + /* alter the thread keyring */ + if (new->fsuid != old->fsuid) + key_fsuid_changed(task); + if (new->fsgid != old->fsgid) + key_fsgid_changed(task); + + /* do it + * - What if a process setreuid()'s and this brings the + * new uid over his NPROC rlimit? We can check this now + * cheaply with the new uid cache, so if it matters + * we should be checking for it. -DaveM + */ + if (new->user != old->user) + atomic_inc(&new->user->processes); + rcu_assign_pointer(task->cred, new); + if (new->user != old->user) + atomic_dec(&old->user->processes); + + sched_switch_user(task); + + /* send notifications */ + if (new->uid != old->uid || + new->euid != old->euid || + new->suid != old->suid || + new->fsuid != old->fsuid) + proc_id_connector(task, PROC_EVENT_UID); - atomic_inc(&pcred->user->processes); + if (new->gid != old->gid || + new->egid != old->egid || + new->sgid != old->sgid || + new->fsgid != old->fsgid) + proc_id_connector(task, PROC_EVENT_GID); - /* RCU assignment is unneeded here as no-one can have accessed this - * pointer yet, barring us */ - p->cred = pcred; + put_cred(old); return 0; } +EXPORT_SYMBOL(commit_creds); + +/** + * abort_creds - Discard a set of credentials and unlock the current task + * @new: The credentials that were going to be applied + * + * Discard a set of credentials that were under construction and unlock the + * current task. + */ +void abort_creds(struct cred *new) +{ + BUG_ON(atomic_read(&new->usage) < 1); + put_cred(new); +} +EXPORT_SYMBOL(abort_creds); + +/** + * override_creds - Temporarily override the current process's credentials + * @new: The credentials to be assigned + * + * Install a set of temporary override credentials on the current process, + * returning the old set for later reversion. + */ +const struct cred *override_creds(const struct cred *new) +{ + const struct cred *old = current->cred; + + rcu_assign_pointer(current->cred, get_cred(new)); + return old; +} +EXPORT_SYMBOL(override_creds); + +/** + * revert_creds - Revert a temporary credentials override + * @old: The credentials to be restored + * + * Revert a temporary set of override credentials to an old set, discarding the + * override set. + */ +void revert_creds(const struct cred *old) +{ + const struct cred *override = current->cred; + + rcu_assign_pointer(current->cred, old); + put_cred(override); +} +EXPORT_SYMBOL(revert_creds); + +/* + * initialise the credentials stuff + */ +void __init cred_init(void) +{ + /* allocate a slab in which we can store credentials */ + cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); +} diff --git a/kernel/exit.c b/kernel/exit.c index bbc22530f2c1..c0711da15486 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -47,12 +47,14 @@ #include #include #include +#include #include #include #include #include #include +#include "cred-internals.h" static void exit_mm(struct task_struct * tsk); @@ -338,12 +340,12 @@ static void reparent_to_kthreadd(void) /* cpus_allowed? */ /* rt_priority? */ /* signals? */ - security_task_reparent_to_init(current); memcpy(current->signal->rlim, init_task.signal->rlim, sizeof(current->signal->rlim)); - atomic_inc(&(INIT_USER->__count)); + + atomic_inc(&init_cred.usage); + commit_creds(&init_cred); write_unlock_irq(&tasklist_lock); - switch_uid(INIT_USER); } void __set_special_pids(struct pid *pid) @@ -1085,7 +1087,6 @@ NORET_TYPE void do_exit(long code) check_stack_usage(); exit_thread(); cgroup_exit(tsk, 1); - exit_keys(tsk); if (group_dead && tsk->signal->leader) disassociate_ctty(1); diff --git a/kernel/fork.c b/kernel/fork.c index ded1972672a3..82a7948a664e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1084,10 +1084,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_cleanup_sighand; if ((retval = copy_mm(clone_flags, p))) goto bad_fork_cleanup_signal; - if ((retval = copy_keys(clone_flags, p))) - goto bad_fork_cleanup_mm; if ((retval = copy_namespaces(clone_flags, p))) - goto bad_fork_cleanup_keys; + goto bad_fork_cleanup_mm; if ((retval = copy_io(clone_flags, p))) goto bad_fork_cleanup_namespaces; retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); @@ -1252,8 +1250,6 @@ bad_fork_cleanup_io: put_io_context(p->io_context); bad_fork_cleanup_namespaces: exit_task_namespaces(p); -bad_fork_cleanup_keys: - exit_keys(p); bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); @@ -1281,6 +1277,7 @@ bad_fork_cleanup_cgroup: bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: + atomic_dec(&p->cred->user->processes); put_cred(p->cred); bad_fork_free: free_task(p); diff --git a/kernel/kmod.c b/kernel/kmod.c index f044f8f57703..b46dbb908669 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -118,10 +118,10 @@ EXPORT_SYMBOL(request_module); struct subprocess_info { struct work_struct work; struct completion *complete; + struct cred *cred; char *path; char **argv; char **envp; - struct key *ring; enum umh_wait wait; int retval; struct file *stdin; @@ -134,19 +134,20 @@ struct subprocess_info { static int ____call_usermodehelper(void *data) { struct subprocess_info *sub_info = data; - struct key *new_session, *old_session; int retval; - /* Unblock all signals and set the session keyring. */ - new_session = key_get(sub_info->ring); + BUG_ON(atomic_read(&sub_info->cred->usage) != 1); + + /* Unblock all signals */ spin_lock_irq(¤t->sighand->siglock); - old_session = __install_session_keyring(new_session); flush_signal_handlers(current, 1); sigemptyset(¤t->blocked); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - key_put(old_session); + /* Install the credentials */ + commit_creds(sub_info->cred); + sub_info->cred = NULL; /* Install input pipe when needed */ if (sub_info->stdin) { @@ -185,6 +186,8 @@ void call_usermodehelper_freeinfo(struct subprocess_info *info) { if (info->cleanup) (*info->cleanup)(info->argv, info->envp); + if (info->cred) + put_cred(info->cred); kfree(info); } EXPORT_SYMBOL(call_usermodehelper_freeinfo); @@ -240,6 +243,8 @@ static void __call_usermodehelper(struct work_struct *work) pid_t pid; enum umh_wait wait = sub_info->wait; + BUG_ON(atomic_read(&sub_info->cred->usage) != 1); + /* CLONE_VFORK: wait until the usermode helper has execve'd * successfully We need the data structures to stay around * until that is done. */ @@ -362,6 +367,9 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, sub_info->path = path; sub_info->argv = argv; sub_info->envp = envp; + sub_info->cred = prepare_usermodehelper_creds(); + if (!sub_info->cred) + return NULL; out: return sub_info; @@ -376,7 +384,13 @@ EXPORT_SYMBOL(call_usermodehelper_setup); void call_usermodehelper_setkeys(struct subprocess_info *info, struct key *session_keyring) { - info->ring = session_keyring; +#ifdef CONFIG_KEYS + struct thread_group_cred *tgcred = info->cred->tgcred; + key_put(tgcred->session_keyring); + tgcred->session_keyring = key_get(session_keyring); +#else + BUG(); +#endif } EXPORT_SYMBOL(call_usermodehelper_setkeys); @@ -444,6 +458,8 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, DECLARE_COMPLETION_ONSTACK(done); int retval = 0; + BUG_ON(atomic_read(&sub_info->cred->usage) != 1); + helper_lock(); if (sub_info->path[0] == '\0') goto out; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index b9d5f4e4f6a4..f764b8806955 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -171,6 +171,14 @@ int ptrace_attach(struct task_struct *task) if (same_thread_group(task, current)) goto out; + /* Protect exec's credential calculations against our interference; + * SUID, SGID and LSM creds get determined differently under ptrace. + */ + retval = mutex_lock_interruptible(¤t->cred_exec_mutex); + if (retval < 0) + goto out; + + retval = -EPERM; repeat: /* * Nasty, nasty. @@ -210,6 +218,7 @@ repeat: bad: write_unlock_irqrestore(&tasklist_lock, flags); task_unlock(task); + mutex_unlock(¤t->cred_exec_mutex); out: return retval; } diff --git a/kernel/signal.c b/kernel/signal.c index 84989124bafb..2a64304ed54b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -180,7 +180,7 @@ int next_signal(struct sigpending *pending, sigset_t *mask) /* * allocate a new signal queue record * - this may be called without locks if and only if t == current, otherwise an - * appopriate lock must be held to protect t's user_struct + * appopriate lock must be held to stop the target task from exiting */ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, int override_rlimit) @@ -194,7 +194,7 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, * caller must be holding the RCU readlock (by way of a spinlock) and * we use RCU protection here */ - user = __task_cred(t)->user; + user = get_uid(__task_cred(t)->user); atomic_inc(&user->sigpending); if (override_rlimit || atomic_read(&user->sigpending) <= @@ -202,12 +202,14 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, q = kmem_cache_alloc(sigqueue_cachep, flags); if (unlikely(q == NULL)) { atomic_dec(&user->sigpending); + free_uid(user); } else { INIT_LIST_HEAD(&q->list); q->flags = 0; - q->user = get_uid(user); + q->user = user; } - return(q); + + return q; } static void __sigqueue_free(struct sigqueue *q) diff --git a/kernel/sys.c b/kernel/sys.c index ccc9eb736d35..ab735040468a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -180,7 +180,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = cred->user; + user = (struct user_struct *) cred->user; if (!who) who = cred->uid; else if ((who != cred->uid) && @@ -479,47 +479,48 @@ void ctrl_alt_del(void) */ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) { - struct cred *cred = current->cred; - int old_rgid = cred->gid; - int old_egid = cred->egid; - int new_rgid = old_rgid; - int new_egid = old_egid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE); if (retval) - return retval; + goto error; + retval = -EPERM; if (rgid != (gid_t) -1) { - if ((old_rgid == rgid) || - (cred->egid == rgid) || + if (old->gid == rgid || + old->egid == rgid || capable(CAP_SETGID)) - new_rgid = rgid; + new->gid = rgid; else - return -EPERM; + goto error; } if (egid != (gid_t) -1) { - if ((old_rgid == egid) || - (cred->egid == egid) || - (cred->sgid == egid) || + if (old->gid == egid || + old->egid == egid || + old->sgid == egid || capable(CAP_SETGID)) - new_egid = egid; + new->egid = egid; else - return -EPERM; - } - if (new_egid != old_egid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); + goto error; } + if (rgid != (gid_t) -1 || - (egid != (gid_t) -1 && egid != old_rgid)) - cred->sgid = new_egid; - cred->fsgid = new_egid; - cred->egid = new_egid; - cred->gid = new_rgid; - key_fsgid_changed(current); - proc_id_connector(current, PROC_EVENT_GID); - return 0; + (egid != (gid_t) -1 && egid != old->gid)) + new->sgid = new->egid; + new->fsgid = new->egid; + + return commit_creds(new); + +error: + abort_creds(new); + return retval; } /* @@ -529,40 +530,42 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) */ asmlinkage long sys_setgid(gid_t gid) { - struct cred *cred = current->cred; - int old_egid = cred->egid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID); if (retval) - return retval; + goto error; - if (capable(CAP_SETGID)) { - if (old_egid != gid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->gid = cred->egid = cred->sgid = cred->fsgid = gid; - } else if ((gid == cred->gid) || (gid == cred->sgid)) { - if (old_egid != gid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->egid = cred->fsgid = gid; - } + retval = -EPERM; + if (capable(CAP_SETGID)) + new->gid = new->egid = new->sgid = new->fsgid = gid; + else if (gid == old->gid || gid == old->sgid) + new->egid = new->fsgid = gid; else - return -EPERM; + goto error; - key_fsgid_changed(current); - proc_id_connector(current, PROC_EVENT_GID); - return 0; + return commit_creds(new); + +error: + abort_creds(new); + return retval; } -static int set_user(uid_t new_ruid, int dumpclear) +/* + * change the user struct in a credentials set to match the new UID + */ +static int set_user(struct cred *new) { struct user_struct *new_user; - new_user = alloc_uid(current->nsproxy->user_ns, new_ruid); + new_user = alloc_uid(current->nsproxy->user_ns, new->uid); if (!new_user) return -EAGAIN; @@ -573,13 +576,8 @@ static int set_user(uid_t new_ruid, int dumpclear) return -EAGAIN; } - switch_uid(new_user); - - if (dumpclear) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - current->cred->uid = new_ruid; + free_uid(new->user); + new->user = new_user; return 0; } @@ -600,55 +598,56 @@ static int set_user(uid_t new_ruid, int dumpclear) */ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) { - struct cred *cred = current->cred; - int old_ruid, old_euid, old_suid, new_ruid, new_euid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE); if (retval) - return retval; - - new_ruid = old_ruid = cred->uid; - new_euid = old_euid = cred->euid; - old_suid = cred->suid; + goto error; + retval = -EPERM; if (ruid != (uid_t) -1) { - new_ruid = ruid; - if ((old_ruid != ruid) && - (cred->euid != ruid) && + new->uid = ruid; + if (old->uid != ruid && + old->euid != ruid && !capable(CAP_SETUID)) - return -EPERM; + goto error; } if (euid != (uid_t) -1) { - new_euid = euid; - if ((old_ruid != euid) && - (cred->euid != euid) && - (cred->suid != euid) && + new->euid = euid; + if (old->uid != euid && + old->euid != euid && + old->suid != euid && !capable(CAP_SETUID)) - return -EPERM; + goto error; } - if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0) - return -EAGAIN; + retval = -EAGAIN; + if (new->uid != old->uid && set_user(new) < 0) + goto error; - if (new_euid != old_euid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->fsuid = cred->euid = new_euid; if (ruid != (uid_t) -1 || - (euid != (uid_t) -1 && euid != old_ruid)) - cred->suid = cred->euid; - cred->fsuid = cred->euid; - - key_fsuid_changed(current); - proc_id_connector(current, PROC_EVENT_UID); + (euid != (uid_t) -1 && euid != old->uid)) + new->suid = new->euid; + new->fsuid = new->euid; - return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); -} + retval = security_task_fix_setuid(new, old, LSM_SETID_RE); + if (retval < 0) + goto error; + return commit_creds(new); +error: + abort_creds(new); + return retval; +} /* * setuid() is implemented like SysV with SAVED_IDS @@ -663,37 +662,41 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) */ asmlinkage long sys_setuid(uid_t uid) { - struct cred *cred = current->cred; - int old_euid = cred->euid; - int old_ruid, old_suid, new_suid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID); if (retval) - return retval; + goto error; - old_ruid = cred->uid; - old_suid = cred->suid; - new_suid = old_suid; - + retval = -EPERM; if (capable(CAP_SETUID)) { - if (uid != old_ruid && set_user(uid, old_euid != uid) < 0) - return -EAGAIN; - new_suid = uid; - } else if ((uid != cred->uid) && (uid != new_suid)) - return -EPERM; - - if (old_euid != uid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); + new->suid = new->uid = uid; + if (uid != old->uid && set_user(new) < 0) { + retval = -EAGAIN; + goto error; + } + } else if (uid != old->uid && uid != new->suid) { + goto error; } - cred->fsuid = cred->euid = uid; - cred->suid = new_suid; - key_fsuid_changed(current); - proc_id_connector(current, PROC_EVENT_UID); + new->fsuid = new->euid = uid; + + retval = security_task_fix_setuid(new, old, LSM_SETID_ID); + if (retval < 0) + goto error; + + return commit_creds(new); - return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); +error: + abort_creds(new); + return retval; } @@ -703,47 +706,53 @@ asmlinkage long sys_setuid(uid_t uid) */ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) { - struct cred *cred = current->cred; - int old_ruid = cred->uid; - int old_euid = cred->euid; - int old_suid = cred->suid; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES); if (retval) - return retval; + goto error; + old = current_cred(); + retval = -EPERM; if (!capable(CAP_SETUID)) { - if ((ruid != (uid_t) -1) && (ruid != cred->uid) && - (ruid != cred->euid) && (ruid != cred->suid)) - return -EPERM; - if ((euid != (uid_t) -1) && (euid != cred->uid) && - (euid != cred->euid) && (euid != cred->suid)) - return -EPERM; - if ((suid != (uid_t) -1) && (suid != cred->uid) && - (suid != cred->euid) && (suid != cred->suid)) - return -EPERM; + if (ruid != (uid_t) -1 && ruid != old->uid && + ruid != old->euid && ruid != old->suid) + goto error; + if (euid != (uid_t) -1 && euid != old->uid && + euid != old->euid && euid != old->suid) + goto error; + if (suid != (uid_t) -1 && suid != old->uid && + suid != old->euid && suid != old->suid) + goto error; } + + retval = -EAGAIN; if (ruid != (uid_t) -1) { - if (ruid != cred->uid && - set_user(ruid, euid != cred->euid) < 0) - return -EAGAIN; + new->uid = ruid; + if (ruid != old->uid && set_user(new) < 0) + goto error; } - if (euid != (uid_t) -1) { - if (euid != cred->euid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->euid = euid; - } - cred->fsuid = cred->euid; + if (euid != (uid_t) -1) + new->euid = euid; if (suid != (uid_t) -1) - cred->suid = suid; + new->suid = suid; + new->fsuid = new->euid; - key_fsuid_changed(current); - proc_id_connector(current, PROC_EVENT_UID); + retval = security_task_fix_setuid(new, old, LSM_SETID_RES); + if (retval < 0) + goto error; - return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); + return commit_creds(new); + +error: + abort_creds(new); + return retval; } asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) @@ -763,40 +772,45 @@ asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __us */ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) { - struct cred *cred = current->cred; + const struct cred *old; + struct cred *new; int retval; + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); + retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES); if (retval) - return retval; + goto error; + retval = -EPERM; if (!capable(CAP_SETGID)) { - if ((rgid != (gid_t) -1) && (rgid != cred->gid) && - (rgid != cred->egid) && (rgid != cred->sgid)) - return -EPERM; - if ((egid != (gid_t) -1) && (egid != cred->gid) && - (egid != cred->egid) && (egid != cred->sgid)) - return -EPERM; - if ((sgid != (gid_t) -1) && (sgid != cred->gid) && - (sgid != cred->egid) && (sgid != cred->sgid)) - return -EPERM; + if (rgid != (gid_t) -1 && rgid != old->gid && + rgid != old->egid && rgid != old->sgid) + goto error; + if (egid != (gid_t) -1 && egid != old->gid && + egid != old->egid && egid != old->sgid) + goto error; + if (sgid != (gid_t) -1 && sgid != old->gid && + sgid != old->egid && sgid != old->sgid) + goto error; } - if (egid != (gid_t) -1) { - if (egid != cred->egid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); - } - cred->egid = egid; - } - cred->fsgid = cred->egid; + if (rgid != (gid_t) -1) - cred->gid = rgid; + new->gid = rgid; + if (egid != (gid_t) -1) + new->egid = egid; if (sgid != (gid_t) -1) - cred->sgid = sgid; + new->sgid = sgid; + new->fsgid = new->egid; - key_fsgid_changed(current); - proc_id_connector(current, PROC_EVENT_GID); - return 0; + return commit_creds(new); + +error: + abort_creds(new); + return retval; } asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) @@ -820,28 +834,35 @@ asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __us */ asmlinkage long sys_setfsuid(uid_t uid) { - struct cred *cred = current->cred; - int old_fsuid; + const struct cred *old; + struct cred *new; + uid_t old_fsuid; + + new = prepare_creds(); + if (!new) + return current_fsuid(); + old = current_cred(); + old_fsuid = old->fsuid; - old_fsuid = cred->fsuid; - if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS)) - return old_fsuid; + if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0) + goto error; - if (uid == cred->uid || uid == cred->euid || - uid == cred->suid || uid == cred->fsuid || + if (uid == old->uid || uid == old->euid || + uid == old->suid || uid == old->fsuid || capable(CAP_SETUID)) { if (uid != old_fsuid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); + new->fsuid = uid; + if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) + goto change_okay; } - cred->fsuid = uid; } - key_fsuid_changed(current); - proc_id_connector(current, PROC_EVENT_UID); - - security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS); +error: + abort_creds(new); + return old_fsuid; +change_okay: + commit_creds(new); return old_fsuid; } @@ -850,24 +871,34 @@ asmlinkage long sys_setfsuid(uid_t uid) */ asmlinkage long sys_setfsgid(gid_t gid) { - struct cred *cred = current->cred; - int old_fsgid; + const struct cred *old; + struct cred *new; + gid_t old_fsgid; + + new = prepare_creds(); + if (!new) + return current_fsgid(); + old = current_cred(); + old_fsgid = old->fsgid; - old_fsgid = cred->fsgid; if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS)) - return old_fsgid; + goto error; - if (gid == cred->gid || gid == cred->egid || - gid == cred->sgid || gid == cred->fsgid || + if (gid == old->gid || gid == old->egid || + gid == old->sgid || gid == old->fsgid || capable(CAP_SETGID)) { if (gid != old_fsgid) { - set_dumpable(current->mm, suid_dumpable); - smp_wmb(); + new->fsgid = gid; + goto change_okay; } - cred->fsgid = gid; - key_fsgid_changed(current); - proc_id_connector(current, PROC_EVENT_GID); } + +error: + abort_creds(new); + return old_fsgid; + +change_okay: + commit_creds(new); return old_fsgid; } @@ -1136,7 +1167,7 @@ EXPORT_SYMBOL(groups_free); /* export the group_info to a user-space array */ static int groups_to_user(gid_t __user *grouplist, - struct group_info *group_info) + const struct group_info *group_info) { int i; unsigned int count = group_info->ngroups; @@ -1227,31 +1258,25 @@ int groups_search(const struct group_info *group_info, gid_t grp) } /** - * set_groups - Change a group subscription in a security record - * @sec: The security record to alter - * @group_info: The group list to impose + * set_groups - Change a group subscription in a set of credentials + * @new: The newly prepared set of credentials to alter + * @group_info: The group list to install * - * Validate a group subscription and, if valid, impose it upon a task security - * record. + * Validate a group subscription and, if valid, insert it into a set + * of credentials. */ -int set_groups(struct cred *cred, struct group_info *group_info) +int set_groups(struct cred *new, struct group_info *group_info) { int retval; - struct group_info *old_info; retval = security_task_setgroups(group_info); if (retval) return retval; + put_group_info(new->group_info); groups_sort(group_info); get_group_info(group_info); - - spin_lock(&cred->lock); - old_info = cred->group_info; - cred->group_info = group_info; - spin_unlock(&cred->lock); - - put_group_info(old_info); + new->group_info = group_info; return 0; } @@ -1266,7 +1291,20 @@ EXPORT_SYMBOL(set_groups); */ int set_current_groups(struct group_info *group_info) { - return set_groups(current->cred, group_info); + struct cred *new; + int ret; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + + ret = set_groups(new, group_info); + if (ret < 0) { + abort_creds(new); + return ret; + } + + return commit_creds(new); } EXPORT_SYMBOL(set_current_groups); @@ -1666,9 +1704,11 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned char comm[sizeof(me->comm)]; long error; - if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error)) + error = security_task_prctl(option, arg2, arg3, arg4, arg5); + if (error != -ENOSYS) return error; + error = 0; switch (option) { case PR_SET_PDEATHSIG: if (!valid_signal(arg2)) { diff --git a/kernel/user.c b/kernel/user.c index 104d22ac84d5..d476307dd4b0 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -16,6 +16,7 @@ #include #include #include +#include "cred-internals.h" struct user_namespace init_user_ns = { .kref = { @@ -104,16 +105,10 @@ static int sched_create_user(struct user_struct *up) return rc; } -static void sched_switch_user(struct task_struct *p) -{ - sched_move_task(p); -} - #else /* CONFIG_USER_SCHED */ static void sched_destroy_user(struct user_struct *up) { } static int sched_create_user(struct user_struct *up) { return 0; } -static void sched_switch_user(struct task_struct *p) { } #endif /* CONFIG_USER_SCHED */ @@ -448,36 +443,6 @@ out_unlock: return NULL; } -void switch_uid(struct user_struct *new_user) -{ - struct user_struct *old_user; - - /* What if a process setreuid()'s and this brings the - * new uid over his NPROC rlimit? We can check this now - * cheaply with the new uid cache, so if it matters - * we should be checking for it. -DaveM - */ - old_user = current->cred->user; - atomic_inc(&new_user->processes); - atomic_dec(&old_user->processes); - switch_uid_keyring(new_user); - current->cred->user = new_user; - sched_switch_user(current); - - /* - * We need to synchronize with __sigqueue_alloc() - * doing a get_uid(p->user).. If that saw the old - * user value, we need to wait until it has exited - * its critical region before we can free the old - * structure. - */ - smp_mb(); - spin_unlock_wait(¤t->sighand->siglock); - - free_uid(old_user); - suid_keys(current); -} - #ifdef CONFIG_USER_NS void release_uids(struct user_namespace *ns) { diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index f82730adea00..0d9c51d67333 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -19,6 +19,7 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) { struct user_namespace *ns; struct user_struct *new_user; + struct cred *new; int n; ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); @@ -45,7 +46,16 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) return ERR_PTR(-ENOMEM); } - switch_uid(new_user); + /* Install the new user */ + new = prepare_creds(); + if (!new) { + free_uid(new_user); + free_uid(ns->root_user); + kfree(ns); + } + free_uid(new->user); + new->user = new_user; + commit_creds(new); return ns; } diff --git a/lib/Makefile b/lib/Makefile index 7cb65d85aeb0..80fe8a3ec12a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,7 +11,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ - proportions.o prio_heap.o ratelimit.o show_mem.o + proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 9a8ff684da79..ad8c7a782da1 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -287,6 +287,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, time_t expiry, u32 kvno) { + const struct cred *cred = current_cred(); struct key *key; int ret; @@ -297,7 +298,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, _enter(""); - key = key_alloc(&key_type_rxrpc, "x", 0, 0, current, 0, + key = key_alloc(&key_type_rxrpc, "x", 0, 0, cred, 0, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(key)) { _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key)); @@ -340,10 +341,11 @@ EXPORT_SYMBOL(rxrpc_get_server_data_key); */ struct key *rxrpc_get_null_key(const char *keyname) { + const struct cred *cred = current_cred(); struct key *key; int ret; - key = key_alloc(&key_type_rxrpc, keyname, 0, 0, current, + key = key_alloc(&key_type_rxrpc, keyname, 0, 0, cred, KEY_POS_SEARCH, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(key)) return key; diff --git a/security/capability.c b/security/capability.c index fac2f61b69a9..efeb6d9e0e6a 100644 --- a/security/capability.c +++ b/security/capability.c @@ -340,12 +340,16 @@ static int cap_task_create(unsigned long clone_flags) return 0; } -static int cap_cred_alloc_security(struct cred *cred) +static void cap_cred_free(struct cred *cred) +{ +} + +static int cap_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp) { return 0; } -static void cap_cred_free(struct cred *cred) +static void cap_cred_commit(struct cred *new, const struct cred *old) { } @@ -750,7 +754,7 @@ static void cap_release_secctx(char *secdata, u32 seclen) } #ifdef CONFIG_KEYS -static int cap_key_alloc(struct key *key, struct task_struct *ctx, +static int cap_key_alloc(struct key *key, const struct cred *cred, unsigned long flags) { return 0; @@ -760,7 +764,7 @@ static void cap_key_free(struct key *key) { } -static int cap_key_permission(key_ref_t key_ref, struct task_struct *context, +static int cap_key_permission(key_ref_t key_ref, const struct cred *cred, key_perm_t perm) { return 0; @@ -814,8 +818,7 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, ptrace_may_access); set_to_cap_if_null(ops, ptrace_traceme); set_to_cap_if_null(ops, capget); - set_to_cap_if_null(ops, capset_check); - set_to_cap_if_null(ops, capset_set); + set_to_cap_if_null(ops, capset); set_to_cap_if_null(ops, acct); set_to_cap_if_null(ops, capable); set_to_cap_if_null(ops, quotactl); @@ -890,10 +893,11 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, file_receive); set_to_cap_if_null(ops, dentry_open); set_to_cap_if_null(ops, task_create); - set_to_cap_if_null(ops, cred_alloc_security); set_to_cap_if_null(ops, cred_free); + set_to_cap_if_null(ops, cred_prepare); + set_to_cap_if_null(ops, cred_commit); set_to_cap_if_null(ops, task_setuid); - set_to_cap_if_null(ops, task_post_setuid); + set_to_cap_if_null(ops, task_fix_setuid); set_to_cap_if_null(ops, task_setgid); set_to_cap_if_null(ops, task_setpgid); set_to_cap_if_null(ops, task_getpgid); @@ -910,7 +914,6 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, task_wait); set_to_cap_if_null(ops, task_kill); set_to_cap_if_null(ops, task_prctl); - set_to_cap_if_null(ops, task_reparent_to_init); set_to_cap_if_null(ops, task_to_inode); set_to_cap_if_null(ops, ipc_permission); set_to_cap_if_null(ops, ipc_getsecid); diff --git a/security/commoncap.c b/security/commoncap.c index 0384bf95db68..b5419273f92d 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -72,8 +72,8 @@ int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) int ret = 0; rcu_read_lock(); - if (!cap_issubset(child->cred->cap_permitted, - current->cred->cap_permitted) && + if (!cap_issubset(__task_cred(child)->cap_permitted, + current_cred()->cap_permitted) && !capable(CAP_SYS_PTRACE)) ret = -EPERM; rcu_read_unlock(); @@ -85,8 +85,8 @@ int cap_ptrace_traceme(struct task_struct *parent) int ret = 0; rcu_read_lock(); - if (!cap_issubset(current->cred->cap_permitted, - parent->cred->cap_permitted) && + if (!cap_issubset(current_cred()->cap_permitted, + __task_cred(parent)->cap_permitted) && !has_capability(parent, CAP_SYS_PTRACE)) ret = -EPERM; rcu_read_unlock(); @@ -117,7 +117,7 @@ static inline int cap_inh_is_capped(void) * to the old permitted set. That is, if the current task * does *not* possess the CAP_SETPCAP capability. */ - return (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0); + return cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0; } static inline int cap_limit_ptraced_target(void) { return 1; } @@ -132,52 +132,39 @@ static inline int cap_limit_ptraced_target(void) #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ -int cap_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) +int cap_capset(struct cred *new, + const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { - const struct cred *cred = current->cred; - - if (cap_inh_is_capped() - && !cap_issubset(*inheritable, - cap_combine(cred->cap_inheritable, - cred->cap_permitted))) { + if (cap_inh_is_capped() && + !cap_issubset(*inheritable, + cap_combine(old->cap_inheritable, + old->cap_permitted))) /* incapable of using this inheritable set */ return -EPERM; - } + if (!cap_issubset(*inheritable, - cap_combine(cred->cap_inheritable, - cred->cap_bset))) { + cap_combine(old->cap_inheritable, + old->cap_bset))) /* no new pI capabilities outside bounding set */ return -EPERM; - } /* verify restrictions on target's new Permitted set */ - if (!cap_issubset (*permitted, - cap_combine (cred->cap_permitted, - cred->cap_permitted))) { + if (!cap_issubset(*permitted, old->cap_permitted)) return -EPERM; - } /* verify the _new_Effective_ is a subset of the _new_Permitted_ */ - if (!cap_issubset (*effective, *permitted)) { + if (!cap_issubset(*effective, *permitted)) return -EPERM; - } + new->cap_effective = *effective; + new->cap_inheritable = *inheritable; + new->cap_permitted = *permitted; return 0; } -void cap_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) -{ - struct cred *cred = current->cred; - - cred->cap_effective = *effective; - cred->cap_inheritable = *inheritable; - cred->cap_permitted = *permitted; -} - static inline void bprm_clear_caps(struct linux_binprm *bprm) { cap_clear(bprm->cap_post_exec_permitted); @@ -382,41 +369,46 @@ int cap_bprm_set_security (struct linux_binprm *bprm) return ret; } -void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) +int cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) { - struct cred *cred = current->cred; + const struct cred *old = current_cred(); + struct cred *new; + + new = prepare_creds(); + if (!new) + return -ENOMEM; - if (bprm->e_uid != cred->uid || bprm->e_gid != cred->gid || + if (bprm->e_uid != old->uid || bprm->e_gid != old->gid || !cap_issubset(bprm->cap_post_exec_permitted, - cred->cap_permitted)) { + old->cap_permitted)) { set_dumpable(current->mm, suid_dumpable); current->pdeath_signal = 0; if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { if (!capable(CAP_SETUID)) { - bprm->e_uid = cred->uid; - bprm->e_gid = cred->gid; + bprm->e_uid = old->uid; + bprm->e_gid = old->gid; } if (cap_limit_ptraced_target()) { bprm->cap_post_exec_permitted = cap_intersect( bprm->cap_post_exec_permitted, - cred->cap_permitted); + new->cap_permitted); } } } - cred->suid = cred->euid = cred->fsuid = bprm->e_uid; - cred->sgid = cred->egid = cred->fsgid = bprm->e_gid; + new->suid = new->euid = new->fsuid = bprm->e_uid; + new->sgid = new->egid = new->fsgid = bprm->e_gid; /* For init, we want to retain the capabilities set * in the init_task struct. Thus we skip the usual * capability rules */ if (!is_global_init(current)) { - cred->cap_permitted = bprm->cap_post_exec_permitted; + new->cap_permitted = bprm->cap_post_exec_permitted; if (bprm->cap_effective) - cred->cap_effective = bprm->cap_post_exec_permitted; + new->cap_effective = bprm->cap_post_exec_permitted; else - cap_clear(cred->cap_effective); + cap_clear(new->cap_effective); } /* @@ -431,15 +423,15 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) * Number 1 above might fail if you don't have a full bset, but I think * that is interesting information to audit. */ - if (!cap_isclear(cred->cap_effective)) { - if (!cap_issubset(CAP_FULL_SET, cred->cap_effective) || - (bprm->e_uid != 0) || (cred->uid != 0) || + if (!cap_isclear(new->cap_effective)) { + if (!cap_issubset(CAP_FULL_SET, new->cap_effective) || + bprm->e_uid != 0 || new->uid != 0 || issecure(SECURE_NOROOT)) - audit_log_bprm_fcaps(bprm, &cred->cap_permitted, - &cred->cap_effective); + audit_log_bprm_fcaps(bprm, new, old); } - cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + return commit_creds(new); } int cap_bprm_secureexec (struct linux_binprm *bprm) @@ -514,65 +506,49 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) * files.. * Thanks to Olaf Kirch and Peter Benie for spotting this. */ -static inline void cap_emulate_setxuid (int old_ruid, int old_euid, - int old_suid) +static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old) { - struct cred *cred = current->cred; - - if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && - (cred->uid != 0 && cred->euid != 0 && cred->suid != 0) && + if ((old->uid == 0 || old->euid == 0 || old->suid == 0) && + (new->uid != 0 && new->euid != 0 && new->suid != 0) && !issecure(SECURE_KEEP_CAPS)) { - cap_clear(cred->cap_permitted); - cap_clear(cred->cap_effective); - } - if (old_euid == 0 && cred->euid != 0) { - cap_clear(cred->cap_effective); - } - if (old_euid != 0 && cred->euid == 0) { - cred->cap_effective = cred->cap_permitted; + cap_clear(new->cap_permitted); + cap_clear(new->cap_effective); } + if (old->euid == 0 && new->euid != 0) + cap_clear(new->cap_effective); + if (old->euid != 0 && new->euid == 0) + new->cap_effective = new->cap_permitted; } -int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, - int flags) +int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags) { - struct cred *cred = current->cred; - switch (flags) { case LSM_SETID_RE: case LSM_SETID_ID: case LSM_SETID_RES: /* Copied from kernel/sys.c:setreuid/setuid/setresuid. */ - if (!issecure (SECURE_NO_SETUID_FIXUP)) { - cap_emulate_setxuid (old_ruid, old_euid, old_suid); - } + if (!issecure(SECURE_NO_SETUID_FIXUP)) + cap_emulate_setxuid(new, old); break; case LSM_SETID_FS: - { - uid_t old_fsuid = old_ruid; - - /* Copied from kernel/sys.c:setfsuid. */ + /* Copied from kernel/sys.c:setfsuid. */ - /* - * FIXME - is fsuser used for all CAP_FS_MASK capabilities? - * if not, we might be a bit too harsh here. - */ - - if (!issecure (SECURE_NO_SETUID_FIXUP)) { - if (old_fsuid == 0 && cred->fsuid != 0) { - cred->cap_effective = - cap_drop_fs_set( - cred->cap_effective); - } - if (old_fsuid != 0 && cred->fsuid == 0) { - cred->cap_effective = - cap_raise_fs_set( - cred->cap_effective, - cred->cap_permitted); - } + /* + * FIXME - is fsuser used for all CAP_FS_MASK capabilities? + * if not, we might be a bit too harsh here. + */ + if (!issecure(SECURE_NO_SETUID_FIXUP)) { + if (old->fsuid == 0 && new->fsuid != 0) { + new->cap_effective = + cap_drop_fs_set(new->cap_effective); + } + if (old->fsuid != 0 && new->fsuid == 0) { + new->cap_effective = + cap_raise_fs_set(new->cap_effective, + new->cap_permitted); } - break; } + break; default: return -EINVAL; } @@ -628,13 +604,14 @@ int cap_task_setnice (struct task_struct *p, int nice) * this task could get inconsistent info. There can be no * racing writer bc a task can only change its own caps. */ -static long cap_prctl_drop(unsigned long cap) +static long cap_prctl_drop(struct cred *new, unsigned long cap) { if (!capable(CAP_SETPCAP)) return -EPERM; if (!cap_valid(cap)) return -EINVAL; - cap_lower(current->cred->cap_bset, cap); + + cap_lower(new->cap_bset, cap); return 0; } @@ -655,22 +632,29 @@ int cap_task_setnice (struct task_struct *p, int nice) #endif int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5, long *rc_p) + unsigned long arg4, unsigned long arg5) { - struct cred *cred = current_cred(); + struct cred *new; long error = 0; + new = prepare_creds(); + if (!new) + return -ENOMEM; + switch (option) { case PR_CAPBSET_READ: + error = -EINVAL; if (!cap_valid(arg2)) - error = -EINVAL; - else - error = !!cap_raised(cred->cap_bset, arg2); - break; + goto error; + error = !!cap_raised(new->cap_bset, arg2); + goto no_change; + #ifdef CONFIG_SECURITY_FILE_CAPABILITIES case PR_CAPBSET_DROP: - error = cap_prctl_drop(arg2); - break; + error = cap_prctl_drop(new, arg2); + if (error < 0) + goto error; + goto changed; /* * The next four prctl's remain to assist with transitioning a @@ -692,12 +676,12 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, * capability-based-privilege environment. */ case PR_SET_SECUREBITS: - if ((((cred->securebits & SECURE_ALL_LOCKS) >> 1) - & (cred->securebits ^ arg2)) /*[1]*/ - || ((cred->securebits & SECURE_ALL_LOCKS - & ~arg2)) /*[2]*/ - || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ - || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0)) { /*[4]*/ + error = -EPERM; + if ((((new->securebits & SECURE_ALL_LOCKS) >> 1) + & (new->securebits ^ arg2)) /*[1]*/ + || ((new->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/ + || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ + || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0) /*[4]*/ /* * [1] no changing of bits that are locked * [2] no unlocking of locks @@ -705,50 +689,51 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, * [4] doing anything requires privilege (go read about * the "sendmail capabilities bug") */ - error = -EPERM; /* cannot change a locked bit */ - } else { - cred->securebits = arg2; - } - break; + ) + /* cannot change a locked bit */ + goto error; + new->securebits = arg2; + goto changed; + case PR_GET_SECUREBITS: - error = cred->securebits; - break; + error = new->securebits; + goto no_change; #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ case PR_GET_KEEPCAPS: if (issecure(SECURE_KEEP_CAPS)) error = 1; - break; + goto no_change; + case PR_SET_KEEPCAPS: + error = -EINVAL; if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */ - error = -EINVAL; - else if (issecure(SECURE_KEEP_CAPS_LOCKED)) - error = -EPERM; - else if (arg2) - cred->securebits |= issecure_mask(SECURE_KEEP_CAPS); + goto error; + error = -EPERM; + if (issecure(SECURE_KEEP_CAPS_LOCKED)) + goto error; + if (arg2) + new->securebits |= issecure_mask(SECURE_KEEP_CAPS); else - cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); - break; + new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + goto changed; default: /* No functionality available - continue with default */ - return 0; + error = -ENOSYS; + goto error; } /* Functionality provided */ - *rc_p = error; - return 1; -} - -void cap_task_reparent_to_init (struct task_struct *p) -{ - struct cred *cred = p->cred; - - cap_set_init_eff(cred->cap_effective); - cap_clear(cred->cap_inheritable); - cap_set_full(cred->cap_permitted); - p->cred->securebits = SECUREBITS_DEFAULT; +changed: + return commit_creds(new); + +no_change: + error = 0; +error: + abort_creds(new); + return error; } int cap_syslog (int type) diff --git a/security/keys/internal.h b/security/keys/internal.h index d1586c629788..81932abefe7b 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -12,6 +12,7 @@ #ifndef _INTERNAL_H #define _INTERNAL_H +#include #include static inline __attribute__((format(printf, 1, 2))) @@ -25,7 +26,7 @@ void no_printk(const char *fmt, ...) #define kleave(FMT, ...) \ printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__) #define kdebug(FMT, ...) \ - printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__) + printk(KERN_DEBUG " "FMT"\n", ##__VA_ARGS__) #else #define kenter(FMT, ...) \ no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__) @@ -97,7 +98,7 @@ extern struct key *keyring_search_instkey(struct key *keyring, typedef int (*key_match_func_t)(const struct key *, const void *); extern key_ref_t keyring_search_aux(key_ref_t keyring_ref, - struct task_struct *tsk, + const struct cred *cred, struct key_type *type, const void *description, key_match_func_t match); @@ -105,13 +106,13 @@ extern key_ref_t keyring_search_aux(key_ref_t keyring_ref, extern key_ref_t search_process_keyrings(struct key_type *type, const void *description, key_match_func_t match, - struct task_struct *tsk); + const struct cred *cred); extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); extern int install_user_keyrings(void); -extern int install_thread_keyring(void); -extern int install_process_keyring(void); +extern int install_thread_keyring_to_cred(struct cred *); +extern int install_process_keyring_to_cred(struct cred *); extern struct key *request_key_and_link(struct key_type *type, const char *description, @@ -130,12 +131,12 @@ extern long join_session_keyring(const char *name); * check to see whether permission is granted to use a key in the desired way */ extern int key_task_permission(const key_ref_t key_ref, - struct task_struct *context, + const struct cred *cred, key_perm_t perm); static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) { - return key_task_permission(key_ref, current, perm); + return key_task_permission(key_ref, current_cred(), perm); } /* required permissions */ @@ -153,7 +154,7 @@ static inline int key_permission(const key_ref_t key_ref, key_perm_t perm) struct request_key_auth { struct key *target_key; struct key *dest_keyring; - struct task_struct *context; + const struct cred *cred; void *callout_info; size_t callout_len; pid_t pid; diff --git a/security/keys/key.c b/security/keys/key.c index a6ca39ed3b0e..f76c8a546fd3 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -218,7 +218,7 @@ serial_exists: * instantiate the key or discard it before returning */ struct key *key_alloc(struct key_type *type, const char *desc, - uid_t uid, gid_t gid, struct task_struct *ctx, + uid_t uid, gid_t gid, const struct cred *cred, key_perm_t perm, unsigned long flags) { struct key_user *user = NULL; @@ -294,7 +294,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, #endif /* let the security module know about the key */ - ret = security_key_alloc(key, ctx, flags); + ret = security_key_alloc(key, cred, flags); if (ret < 0) goto security_error; @@ -391,7 +391,7 @@ static int __key_instantiate_and_link(struct key *key, const void *data, size_t datalen, struct key *keyring, - struct key *instkey) + struct key *authkey) { int ret, awaken; @@ -421,8 +421,8 @@ static int __key_instantiate_and_link(struct key *key, ret = __key_link(keyring, key); /* disable the authorisation key */ - if (instkey) - key_revoke(instkey); + if (authkey) + key_revoke(authkey); } } @@ -444,14 +444,14 @@ int key_instantiate_and_link(struct key *key, const void *data, size_t datalen, struct key *keyring, - struct key *instkey) + struct key *authkey) { int ret; if (keyring) down_write(&keyring->sem); - ret = __key_instantiate_and_link(key, data, datalen, keyring, instkey); + ret = __key_instantiate_and_link(key, data, datalen, keyring, authkey); if (keyring) up_write(&keyring->sem); @@ -469,7 +469,7 @@ EXPORT_SYMBOL(key_instantiate_and_link); int key_negate_and_link(struct key *key, unsigned timeout, struct key *keyring, - struct key *instkey) + struct key *authkey) { struct timespec now; int ret, awaken; @@ -504,8 +504,8 @@ int key_negate_and_link(struct key *key, ret = __key_link(keyring, key); /* disable the authorisation key */ - if (instkey) - key_revoke(instkey); + if (authkey) + key_revoke(authkey); } mutex_unlock(&key_construction_mutex); @@ -743,6 +743,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, key_perm_t perm, unsigned long flags) { + const struct cred *cred = current_cred(); struct key_type *ktype; struct key *keyring, *key = NULL; key_ref_t key_ref; @@ -802,8 +803,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, } /* allocate a new key */ - key = key_alloc(ktype, description, current_fsuid(), current_fsgid(), - current, perm, flags); + key = key_alloc(ktype, description, cred->fsuid, cred->fsgid, cred, + perm, flags); if (IS_ERR(key)) { key_ref = ERR_CAST(key); goto error_3; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 8833b447adef..7c72baa02f2e 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -866,6 +866,23 @@ static long get_instantiation_keyring(key_serial_t ringid, return -ENOKEY; } +/* + * change the request_key authorisation key on the current process + */ +static int keyctl_change_reqkey_auth(struct key *key) +{ + struct cred *new; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + + key_put(new->request_key_auth); + new->request_key_auth = key_get(key); + + return commit_creds(new); +} + /*****************************************************************************/ /* * instantiate the key with the specified payload, and, if one is given, link @@ -876,12 +893,15 @@ long keyctl_instantiate_key(key_serial_t id, size_t plen, key_serial_t ringid) { + const struct cred *cred = current_cred(); struct request_key_auth *rka; struct key *instkey, *dest_keyring; void *payload; long ret; bool vm = false; + kenter("%d,,%zu,%d", id, plen, ringid); + ret = -EINVAL; if (plen > 1024 * 1024 - 1) goto error; @@ -889,7 +909,7 @@ long keyctl_instantiate_key(key_serial_t id, /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; - instkey = current->cred->request_key_auth; + instkey = cred->request_key_auth; if (!instkey) goto error; @@ -931,10 +951,8 @@ long keyctl_instantiate_key(key_serial_t id, /* discard the assumed authority if it's just been disabled by * instantiation of the key */ - if (ret == 0) { - key_put(current->cred->request_key_auth); - current->cred->request_key_auth = NULL; - } + if (ret == 0) + keyctl_change_reqkey_auth(NULL); error2: if (!vm) @@ -953,14 +971,17 @@ error: */ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) { + const struct cred *cred = current_cred(); struct request_key_auth *rka; struct key *instkey, *dest_keyring; long ret; + kenter("%d,%u,%d", id, timeout, ringid); + /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; - instkey = current->cred->request_key_auth; + instkey = cred->request_key_auth; if (!instkey) goto error; @@ -982,10 +1003,8 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) /* discard the assumed authority if it's just been disabled by * instantiation of the key */ - if (ret == 0) { - key_put(current->cred->request_key_auth); - current->cred->request_key_auth = NULL; - } + if (ret == 0) + keyctl_change_reqkey_auth(NULL); error: return ret; @@ -999,36 +1018,56 @@ error: */ long keyctl_set_reqkey_keyring(int reqkey_defl) { - struct cred *cred = current->cred; - int ret; + struct cred *new; + int ret, old_setting; + + old_setting = current_cred_xxx(jit_keyring); + + if (reqkey_defl == KEY_REQKEY_DEFL_NO_CHANGE) + return old_setting; + + new = prepare_creds(); + if (!new) + return -ENOMEM; switch (reqkey_defl) { case KEY_REQKEY_DEFL_THREAD_KEYRING: - ret = install_thread_keyring(); + ret = install_thread_keyring_to_cred(new); if (ret < 0) - return ret; + goto error; goto set; case KEY_REQKEY_DEFL_PROCESS_KEYRING: - ret = install_process_keyring(); - if (ret < 0) - return ret; + ret = install_process_keyring_to_cred(new); + if (ret < 0) { + if (ret != -EEXIST) + goto error; + ret = 0; + } + goto set; case KEY_REQKEY_DEFL_DEFAULT: case KEY_REQKEY_DEFL_SESSION_KEYRING: case KEY_REQKEY_DEFL_USER_KEYRING: case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: - set: - cred->jit_keyring = reqkey_defl; + case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: + goto set; case KEY_REQKEY_DEFL_NO_CHANGE: - return cred->jit_keyring; - case KEY_REQKEY_DEFL_GROUP_KEYRING: default: - return -EINVAL; + ret = -EINVAL; + goto error; } +set: + new->jit_keyring = reqkey_defl; + commit_creds(new); + return old_setting; +error: + abort_creds(new); + return -EINVAL; + } /* end keyctl_set_reqkey_keyring() */ /*****************************************************************************/ @@ -1087,9 +1126,7 @@ long keyctl_assume_authority(key_serial_t id) /* we divest ourselves of authority if given an ID of 0 */ if (id == 0) { - key_put(current->cred->request_key_auth); - current->cred->request_key_auth = NULL; - ret = 0; + ret = keyctl_change_reqkey_auth(NULL); goto error; } @@ -1104,10 +1141,12 @@ long keyctl_assume_authority(key_serial_t id) goto error; } - key_put(current->cred->request_key_auth); - current->cred->request_key_auth = authkey; - ret = authkey->serial; + ret = keyctl_change_reqkey_auth(authkey); + if (ret < 0) + goto error; + key_put(authkey); + ret = authkey->serial; error: return ret; diff --git a/security/keys/keyring.c b/security/keys/keyring.c index fdf75f901991..ed851574d073 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -245,14 +245,14 @@ static long keyring_read(const struct key *keyring, * allocate a keyring and link into the destination keyring */ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, - struct task_struct *ctx, unsigned long flags, + const struct cred *cred, unsigned long flags, struct key *dest) { struct key *keyring; int ret; keyring = key_alloc(&key_type_keyring, description, - uid, gid, ctx, + uid, gid, cred, (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL, flags); @@ -281,7 +281,7 @@ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, * - we propagate the possession attribute from the keyring ref to the key ref */ key_ref_t keyring_search_aux(key_ref_t keyring_ref, - struct task_struct *context, + const struct cred *cred, struct key_type *type, const void *description, key_match_func_t match) @@ -304,7 +304,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, key_check(keyring); /* top keyring must have search permission to begin the search */ - err = key_task_permission(keyring_ref, context, KEY_SEARCH); + err = key_task_permission(keyring_ref, cred, KEY_SEARCH); if (err < 0) { key_ref = ERR_PTR(err); goto error; @@ -377,7 +377,7 @@ descend: /* key must have search permissions */ if (key_task_permission(make_key_ref(key, possessed), - context, KEY_SEARCH) < 0) + cred, KEY_SEARCH) < 0) continue; /* we set a different error code if we pass a negative key */ @@ -404,7 +404,7 @@ ascend: continue; if (key_task_permission(make_key_ref(key, possessed), - context, KEY_SEARCH) < 0) + cred, KEY_SEARCH) < 0) continue; /* stack the current position */ @@ -459,7 +459,7 @@ key_ref_t keyring_search(key_ref_t keyring, if (!type->match) return ERR_PTR(-ENOKEY); - return keyring_search_aux(keyring, current, + return keyring_search_aux(keyring, current->cred, type, description, type->match); } /* end keyring_search() */ diff --git a/security/keys/permission.c b/security/keys/permission.c index 13c36164f284..5d9fc7b93f2e 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -14,24 +14,27 @@ #include "internal.h" /*****************************************************************************/ -/* - * check to see whether permission is granted to use a key in the desired way, - * but permit the security modules to override +/** + * key_task_permission - Check a key can be used + * @key_ref: The key to check + * @cred: The credentials to use + * @perm: The permissions to check for + * + * Check to see whether permission is granted to use a key in the desired way, + * but permit the security modules to override. + * + * The caller must hold either a ref on cred or must hold the RCU readlock or a + * spinlock. */ -int key_task_permission(const key_ref_t key_ref, - struct task_struct *context, +int key_task_permission(const key_ref_t key_ref, const struct cred *cred, key_perm_t perm) { - const struct cred *cred; struct key *key; key_perm_t kperm; int ret; key = key_ref_to_ptr(key_ref); - rcu_read_lock(); - cred = __task_cred(context); - /* use the second 8-bits of permissions for keys the caller owns */ if (key->uid == cred->fsuid) { kperm = key->perm >> 16; @@ -57,7 +60,6 @@ int key_task_permission(const key_ref_t key_ref, kperm = key->perm; use_these_perms: - rcu_read_lock(); /* use the top 8-bits of permissions for keys the caller possesses * - possessor permissions are additive with other permissions @@ -71,7 +73,7 @@ use_these_perms: return -EACCES; /* let LSM be the final arbiter */ - return security_key_permission(key_ref, context, perm); + return security_key_permission(key_ref, cred, perm); } /* end key_task_permission() */ diff --git a/security/keys/proc.c b/security/keys/proc.c index f619170da760..7f508def50e3 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -136,8 +136,12 @@ static int proc_keys_show(struct seq_file *m, void *v) int rc; /* check whether the current task is allowed to view the key (assuming - * non-possession) */ - rc = key_task_permission(make_key_ref(key, 0), current, KEY_VIEW); + * non-possession) + * - the caller holds a spinlock, and thus the RCU read lock, making our + * access to __current_cred() safe + */ + rc = key_task_permission(make_key_ref(key, 0), current_cred(), + KEY_VIEW); if (rc < 0) return 0; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 70ee93406f30..df329f684a65 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -42,11 +42,15 @@ struct key_user root_key_user = { */ int install_user_keyrings(void) { - struct user_struct *user = current->cred->user; + struct user_struct *user; + const struct cred *cred; struct key *uid_keyring, *session_keyring; char buf[20]; int ret; + cred = current_cred(); + user = cred->user; + kenter("%p{%u}", user, user->uid); if (user->uid_keyring) { @@ -67,7 +71,7 @@ int install_user_keyrings(void) uid_keyring = find_keyring_by_name(buf, true); if (IS_ERR(uid_keyring)) { uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - current, KEY_ALLOC_IN_QUOTA, + cred, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(uid_keyring)) { ret = PTR_ERR(uid_keyring); @@ -83,8 +87,7 @@ int install_user_keyrings(void) if (IS_ERR(session_keyring)) { session_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - current, KEY_ALLOC_IN_QUOTA, - NULL); + cred, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(session_keyring)) { ret = PTR_ERR(session_keyring); goto error_release; @@ -116,142 +119,128 @@ error: return ret; } -/*****************************************************************************/ /* - * deal with the UID changing + * install a fresh thread keyring directly to new credentials */ -void switch_uid_keyring(struct user_struct *new_user) +int install_thread_keyring_to_cred(struct cred *new) { -#if 0 /* do nothing for now */ - struct key *old; - - /* switch to the new user's session keyring if we were running under - * root's default session keyring */ - if (new_user->uid != 0 && - current->session_keyring == &root_session_keyring - ) { - atomic_inc(&new_user->session_keyring->usage); - - task_lock(current); - old = current->session_keyring; - current->session_keyring = new_user->session_keyring; - task_unlock(current); + struct key *keyring; - key_put(old); - } -#endif + keyring = keyring_alloc("_tid", new->uid, new->gid, new, + KEY_ALLOC_QUOTA_OVERRUN, NULL); + if (IS_ERR(keyring)) + return PTR_ERR(keyring); -} /* end switch_uid_keyring() */ + new->thread_keyring = keyring; + return 0; +} -/*****************************************************************************/ /* * install a fresh thread keyring, discarding the old one */ -int install_thread_keyring(void) +static int install_thread_keyring(void) { - struct task_struct *tsk = current; - struct key *keyring, *old; - char buf[20]; + struct cred *new; int ret; - sprintf(buf, "_tid.%u", tsk->pid); + new = prepare_creds(); + if (!new) + return -ENOMEM; - keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, - KEY_ALLOC_QUOTA_OVERRUN, NULL); - if (IS_ERR(keyring)) { - ret = PTR_ERR(keyring); - goto error; + BUG_ON(new->thread_keyring); + + ret = install_thread_keyring_to_cred(new); + if (ret < 0) { + abort_creds(new); + return ret; } - task_lock(tsk); - old = tsk->cred->thread_keyring; - tsk->cred->thread_keyring = keyring; - task_unlock(tsk); + return commit_creds(new); +} - ret = 0; +/* + * install a process keyring directly to a credentials struct + * - returns -EEXIST if there was already a process keyring, 0 if one installed, + * and other -ve on any other error + */ +int install_process_keyring_to_cred(struct cred *new) +{ + struct key *keyring; + int ret; - key_put(old); -error: + if (new->tgcred->process_keyring) + return -EEXIST; + + keyring = keyring_alloc("_pid", new->uid, new->gid, + new, KEY_ALLOC_QUOTA_OVERRUN, NULL); + if (IS_ERR(keyring)) + return PTR_ERR(keyring); + + spin_lock_irq(&new->tgcred->lock); + if (!new->tgcred->process_keyring) { + new->tgcred->process_keyring = keyring; + keyring = NULL; + ret = 0; + } else { + ret = -EEXIST; + } + spin_unlock_irq(&new->tgcred->lock); + key_put(keyring); return ret; +} -} /* end install_thread_keyring() */ - -/*****************************************************************************/ /* * make sure a process keyring is installed + * - we */ -int install_process_keyring(void) +static int install_process_keyring(void) { - struct task_struct *tsk = current; - struct key *keyring; - char buf[20]; + struct cred *new; int ret; - might_sleep(); - - if (!tsk->cred->tgcred->process_keyring) { - sprintf(buf, "_pid.%u", tsk->tgid); - - keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, - KEY_ALLOC_QUOTA_OVERRUN, NULL); - if (IS_ERR(keyring)) { - ret = PTR_ERR(keyring); - goto error; - } - - /* attach keyring */ - spin_lock_irq(&tsk->cred->tgcred->lock); - if (!tsk->cred->tgcred->process_keyring) { - tsk->cred->tgcred->process_keyring = keyring; - keyring = NULL; - } - spin_unlock_irq(&tsk->cred->tgcred->lock); + new = prepare_creds(); + if (!new) + return -ENOMEM; - key_put(keyring); + ret = install_process_keyring_to_cred(new); + if (ret < 0) { + abort_creds(new); + return ret != -EEXIST ?: 0; } - ret = 0; -error: - return ret; - -} /* end install_process_keyring() */ + return commit_creds(new); +} -/*****************************************************************************/ /* - * install a session keyring, discarding the old one - * - if a keyring is not supplied, an empty one is invented + * install a session keyring directly to a credentials struct */ -static int install_session_keyring(struct key *keyring) +static int install_session_keyring_to_cred(struct cred *cred, + struct key *keyring) { - struct task_struct *tsk = current; unsigned long flags; struct key *old; - char buf[20]; might_sleep(); /* create an empty session keyring */ if (!keyring) { - sprintf(buf, "_ses.%u", tsk->tgid); - flags = KEY_ALLOC_QUOTA_OVERRUN; - if (tsk->cred->tgcred->session_keyring) + if (cred->tgcred->session_keyring) flags = KEY_ALLOC_IN_QUOTA; - keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, - tsk, flags, NULL); + keyring = keyring_alloc("_ses", cred->uid, cred->gid, + cred, flags, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); - } - else { + } else { atomic_inc(&keyring->usage); } /* install the keyring */ - spin_lock_irq(&tsk->cred->tgcred->lock); - old = tsk->cred->tgcred->session_keyring; - rcu_assign_pointer(tsk->cred->tgcred->session_keyring, keyring); - spin_unlock_irq(&tsk->cred->tgcred->lock); + spin_lock_irq(&cred->tgcred->lock); + old = cred->tgcred->session_keyring; + rcu_assign_pointer(cred->tgcred->session_keyring, keyring); + spin_unlock_irq(&cred->tgcred->lock); /* we're using RCU on the pointer, but there's no point synchronising * on it if it didn't previously point to anything */ @@ -261,38 +250,29 @@ static int install_session_keyring(struct key *keyring) } return 0; +} -} /* end install_session_keyring() */ - -/*****************************************************************************/ /* - * copy the keys for fork + * install a session keyring, discarding the old one + * - if a keyring is not supplied, an empty one is invented */ -int copy_keys(unsigned long clone_flags, struct task_struct *tsk) +static int install_session_keyring(struct key *keyring) { - key_check(tsk->cred->thread_keyring); - key_check(tsk->cred->request_key_auth); - - /* no thread keyring yet */ - tsk->cred->thread_keyring = NULL; - - /* copy the request_key() authorisation for this thread */ - key_get(tsk->cred->request_key_auth); - - return 0; + struct cred *new; + int ret; -} /* end copy_keys() */ + new = prepare_creds(); + if (!new) + return -ENOMEM; -/*****************************************************************************/ -/* - * dispose of per-thread keys upon thread exit - */ -void exit_keys(struct task_struct *tsk) -{ - key_put(tsk->cred->thread_keyring); - key_put(tsk->cred->request_key_auth); + ret = install_session_keyring_to_cred(new, NULL); + if (ret < 0) { + abort_creds(new); + return ret; + } -} /* end exit_keys() */ + return commit_creds(new); +} /*****************************************************************************/ /* @@ -300,38 +280,41 @@ void exit_keys(struct task_struct *tsk) */ int exec_keys(struct task_struct *tsk) { - struct key *old; + struct thread_group_cred *tgcred = NULL; + struct cred *new; - /* newly exec'd tasks don't get a thread keyring */ - task_lock(tsk); - old = tsk->cred->thread_keyring; - tsk->cred->thread_keyring = NULL; - task_unlock(tsk); +#ifdef CONFIG_KEYS + tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); + if (!tgcred) + return -ENOMEM; +#endif - key_put(old); + new = prepare_creds(); + if (new < 0) + return -ENOMEM; - /* discard the process keyring from a newly exec'd task */ - spin_lock_irq(&tsk->cred->tgcred->lock); - old = tsk->cred->tgcred->process_keyring; - tsk->cred->tgcred->process_keyring = NULL; - spin_unlock_irq(&tsk->cred->tgcred->lock); + /* newly exec'd tasks don't get a thread keyring */ + key_put(new->thread_keyring); + new->thread_keyring = NULL; - key_put(old); + /* create a new per-thread-group creds for all this set of threads to + * share */ + memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred)); - return 0; + atomic_set(&tgcred->usage, 1); + spin_lock_init(&tgcred->lock); -} /* end exec_keys() */ + /* inherit the session keyring; new process keyring */ + key_get(tgcred->session_keyring); + tgcred->process_keyring = NULL; -/*****************************************************************************/ -/* - * deal with SUID programs - * - we might want to make this invent a new session keyring - */ -int suid_keys(struct task_struct *tsk) -{ + release_tgcred(new); + new->tgcred = tgcred; + + commit_creds(new); return 0; -} /* end suid_keys() */ +} /* end exec_keys() */ /*****************************************************************************/ /* @@ -376,16 +359,13 @@ void key_fsgid_changed(struct task_struct *tsk) key_ref_t search_process_keyrings(struct key_type *type, const void *description, key_match_func_t match, - struct task_struct *context) + const struct cred *cred) { struct request_key_auth *rka; - struct cred *cred; key_ref_t key_ref, ret, err; might_sleep(); - cred = get_task_cred(context); - /* we want to return -EAGAIN or -ENOKEY if any of the keyrings were * searchable, but we failed to find a key or we found a negative key; * otherwise we want to return a sample error (probably -EACCES) if @@ -401,7 +381,7 @@ key_ref_t search_process_keyrings(struct key_type *type, if (cred->thread_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->thread_keyring, 1), - context, type, description, match); + cred, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -422,7 +402,7 @@ key_ref_t search_process_keyrings(struct key_type *type, if (cred->tgcred->process_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->tgcred->process_keyring, 1), - context, type, description, match); + cred, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -446,7 +426,7 @@ key_ref_t search_process_keyrings(struct key_type *type, make_key_ref(rcu_dereference( cred->tgcred->session_keyring), 1), - context, type, description, match); + cred, type, description, match); rcu_read_unlock(); if (!IS_ERR(key_ref)) @@ -468,7 +448,7 @@ key_ref_t search_process_keyrings(struct key_type *type, else if (cred->user->session_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->user->session_keyring, 1), - context, type, description, match); + cred, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -490,7 +470,7 @@ key_ref_t search_process_keyrings(struct key_type *type, * - we don't permit access to request_key auth keys via this method */ if (cred->request_key_auth && - context == current && + cred == current_cred() && type != &key_type_request_key_auth ) { /* defend against the auth key being revoked */ @@ -500,7 +480,7 @@ key_ref_t search_process_keyrings(struct key_type *type, rka = cred->request_key_auth->payload.data; key_ref = search_process_keyrings(type, description, - match, rka->context); + match, rka->cred); up_read(&cred->request_key_auth->sem); @@ -527,7 +507,6 @@ key_ref_t search_process_keyrings(struct key_type *type, key_ref = ret ? ret : err; found: - put_cred(cred); return key_ref; } /* end search_process_keyrings() */ @@ -552,8 +531,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, key_perm_t perm) { struct request_key_auth *rka; - struct task_struct *t = current; - struct cred *cred; + const struct cred *cred; struct key *key; key_ref_t key_ref, skey_ref; int ret; @@ -608,6 +586,7 @@ try_again: goto error; ret = install_session_keyring( cred->user->session_keyring); + if (ret < 0) goto error; goto reget_creds; @@ -693,7 +672,7 @@ try_again: /* check to see if we possess the key */ skey_ref = search_process_keyrings(key->type, key, lookup_user_key_possessed, - current); + cred); if (!IS_ERR(skey_ref)) { key_put(key); @@ -725,7 +704,7 @@ try_again: goto invalid_key; /* check the permissions */ - ret = key_task_permission(key_ref, t, perm); + ret = key_task_permission(key_ref, cred, perm); if (ret < 0) goto invalid_key; @@ -755,21 +734,33 @@ reget_creds: */ long join_session_keyring(const char *name) { - struct task_struct *tsk = current; - struct cred *cred = current->cred; + const struct cred *old; + struct cred *new; struct key *keyring; - long ret; + long ret, serial; + + /* only permit this if there's a single thread in the thread group - + * this avoids us having to adjust the creds on all threads and risking + * ENOMEM */ + if (!is_single_threaded(current)) + return -EMLINK; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + old = current_cred(); /* if no name is provided, install an anonymous keyring */ if (!name) { - ret = install_session_keyring(NULL); + ret = install_session_keyring_to_cred(new, NULL); if (ret < 0) goto error; - rcu_read_lock(); - ret = rcu_dereference(cred->tgcred->session_keyring)->serial; - rcu_read_unlock(); - goto error; + serial = new->tgcred->session_keyring->serial; + ret = commit_creds(new); + if (ret == 0) + ret = serial; + goto okay; } /* allow the user to join or create a named keyring */ @@ -779,29 +770,33 @@ long join_session_keyring(const char *name) keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ - keyring = keyring_alloc(name, cred->uid, cred->gid, tsk, + keyring = keyring_alloc(name, old->uid, old->gid, old, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; } - } - else if (IS_ERR(keyring)) { + } else if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; } /* we've got a keyring - now to install it */ - ret = install_session_keyring(keyring); + ret = install_session_keyring_to_cred(new, keyring); if (ret < 0) goto error2; + commit_creds(new); + mutex_unlock(&key_session_mutex); + ret = keyring->serial; key_put(keyring); +okay: + return ret; error2: mutex_unlock(&key_session_mutex); error: + abort_creds(new); return ret; - -} /* end join_session_keyring() */ +} diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 3d12558362df..0e04f72ef2d4 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -83,8 +83,10 @@ static int call_sbin_request_key(struct key_construction *cons, /* allocate a new session keyring */ sprintf(desc, "_req.%u", key->serial); - keyring = keyring_alloc(desc, current_fsuid(), current_fsgid(), current, + cred = get_current_cred(); + keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred, KEY_ALLOC_QUOTA_OVERRUN, NULL); + put_cred(cred); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error_alloc; @@ -104,8 +106,7 @@ static int call_sbin_request_key(struct key_construction *cons, /* we specify the process's default keyrings */ sprintf(keyring_str[0], "%d", - cred->thread_keyring ? - cred->thread_keyring->serial : 0); + cred->thread_keyring ? cred->thread_keyring->serial : 0); prkey = 0; if (cred->tgcred->process_keyring) @@ -155,8 +156,8 @@ error_link: key_put(keyring); error_alloc: - kleave(" = %d", ret); complete_request_key(cons, ret); + kleave(" = %d", ret); return ret; } @@ -295,6 +296,7 @@ static int construct_alloc_key(struct key_type *type, struct key_user *user, struct key **_key) { + const struct cred *cred = current_cred(); struct key *key; key_ref_t key_ref; @@ -302,9 +304,8 @@ static int construct_alloc_key(struct key_type *type, mutex_lock(&user->cons_lock); - key = key_alloc(type, description, - current_fsuid(), current_fsgid(), current, KEY_POS_ALL, - flags); + key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred, + KEY_POS_ALL, flags); if (IS_ERR(key)) goto alloc_failed; @@ -317,8 +318,7 @@ static int construct_alloc_key(struct key_type *type, * waited for locks */ mutex_lock(&key_construction_mutex); - key_ref = search_process_keyrings(type, description, type->match, - current); + key_ref = search_process_keyrings(type, description, type->match, cred); if (!IS_ERR(key_ref)) goto key_already_present; @@ -363,6 +363,8 @@ static struct key *construct_key_and_link(struct key_type *type, struct key *key; int ret; + kenter(""); + user = key_user_lookup(current_fsuid()); if (!user) return ERR_PTR(-ENOMEM); @@ -376,17 +378,21 @@ static struct key *construct_key_and_link(struct key_type *type, if (ret == 0) { ret = construct_key(key, callout_info, callout_len, aux, dest_keyring); - if (ret < 0) + if (ret < 0) { + kdebug("cons failed"); goto construction_failed; + } } key_put(dest_keyring); + kleave(" = key %d", key_serial(key)); return key; construction_failed: key_negate_and_link(key, key_negative_timeout, NULL, NULL); key_put(key); key_put(dest_keyring); + kleave(" = %d", ret); return ERR_PTR(ret); } @@ -405,6 +411,7 @@ struct key *request_key_and_link(struct key_type *type, struct key *dest_keyring, unsigned long flags) { + const struct cred *cred = current_cred(); struct key *key; key_ref_t key_ref; @@ -414,7 +421,7 @@ struct key *request_key_and_link(struct key_type *type, /* search all the process keyrings for a key */ key_ref = search_process_keyrings(type, description, type->match, - current); + cred); if (!IS_ERR(key_ref)) { key = key_ref_to_ptr(key_ref); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 2125579d5d73..86747151ee5b 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -105,9 +105,9 @@ static void request_key_auth_revoke(struct key *key) kenter("{%d}", key->serial); - if (rka->context) { - put_task_struct(rka->context); - rka->context = NULL; + if (rka->cred) { + put_cred(rka->cred); + rka->cred = NULL; } } /* end request_key_auth_revoke() */ @@ -122,9 +122,9 @@ static void request_key_auth_destroy(struct key *key) kenter("{%d}", key->serial); - if (rka->context) { - put_task_struct(rka->context); - rka->context = NULL; + if (rka->cred) { + put_cred(rka->cred); + rka->cred = NULL; } key_put(rka->target_key); @@ -143,6 +143,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, size_t callout_len, struct key *dest_keyring) { struct request_key_auth *rka, *irka; + const struct cred *cred = current->cred; struct key *authkey = NULL; char desc[20]; int ret; @@ -164,28 +165,25 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, /* see if the calling process is already servicing the key request of * another process */ - if (current->cred->request_key_auth) { + if (cred->request_key_auth) { /* it is - use that instantiation context here too */ - down_read(¤t->cred->request_key_auth->sem); + down_read(&cred->request_key_auth->sem); /* if the auth key has been revoked, then the key we're * servicing is already instantiated */ - if (test_bit(KEY_FLAG_REVOKED, - ¤t->cred->request_key_auth->flags)) + if (test_bit(KEY_FLAG_REVOKED, &cred->request_key_auth->flags)) goto auth_key_revoked; - irka = current->cred->request_key_auth->payload.data; - rka->context = irka->context; + irka = cred->request_key_auth->payload.data; + rka->cred = get_cred(irka->cred); rka->pid = irka->pid; - get_task_struct(rka->context); - up_read(¤t->cred->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); } else { /* it isn't - use this process as the context */ - rka->context = current; + rka->cred = get_cred(cred); rka->pid = current->pid; - get_task_struct(rka->context); } rka->target_key = key_get(target); @@ -197,7 +195,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, sprintf(desc, "%x", target->serial); authkey = key_alloc(&key_type_request_key_auth, desc, - current_fsuid(), current_fsgid(), current, + cred->fsuid, cred->fsgid, cred, KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA); if (IS_ERR(authkey)) { @@ -205,16 +203,16 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, goto error_alloc; } - /* construct and attach to the keyring */ + /* construct the auth key */ ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL); if (ret < 0) goto error_inst; - kleave(" = {%d}", authkey->serial); + kleave(" = {%d,%d}", authkey->serial, atomic_read(&authkey->usage)); return authkey; auth_key_revoked: - up_read(¤t->cred->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); kfree(rka->callout_info); kfree(rka); kleave("= -EKEYREVOKED"); @@ -257,6 +255,7 @@ static int key_get_instantiation_authkey_match(const struct key *key, */ struct key *key_get_instantiation_authkey(key_serial_t target_id) { + const struct cred *cred = current_cred(); struct key *authkey; key_ref_t authkey_ref; @@ -264,7 +263,7 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id) &key_type_request_key_auth, (void *) (unsigned long) target_id, key_get_instantiation_authkey_match, - current); + cred); if (IS_ERR(authkey_ref)) { authkey = ERR_CAST(authkey_ref); diff --git a/security/security.c b/security/security.c index f40a0a04c3c2..a55d739c6864 100644 --- a/security/security.c +++ b/security/security.c @@ -145,18 +145,13 @@ int security_capget(struct task_struct *target, return security_ops->capget(target, effective, inheritable, permitted); } -int security_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) +int security_capset(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { - return security_ops->capset_check(effective, inheritable, permitted); -} - -void security_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) -{ - security_ops->capset_set(effective, inheritable, permitted); + return security_ops->capset(new, old, + effective, inheritable, permitted); } int security_capable(struct task_struct *tsk, int cap) @@ -228,9 +223,9 @@ void security_bprm_free(struct linux_binprm *bprm) security_ops->bprm_free_security(bprm); } -void security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) { - security_ops->bprm_apply_creds(bprm, unsafe); + return security_ops->bprm_apply_creds(bprm, unsafe); } void security_bprm_post_apply_creds(struct linux_binprm *bprm) @@ -616,14 +611,19 @@ int security_task_create(unsigned long clone_flags) return security_ops->task_create(clone_flags); } -int security_cred_alloc(struct cred *cred) +void security_cred_free(struct cred *cred) { - return security_ops->cred_alloc_security(cred); + security_ops->cred_free(cred); } -void security_cred_free(struct cred *cred) +int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp) { - security_ops->cred_free(cred); + return security_ops->cred_prepare(new, old, gfp); +} + +void security_commit_creds(struct cred *new, const struct cred *old) +{ + return security_ops->cred_commit(new, old); } int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) @@ -631,10 +631,10 @@ int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) return security_ops->task_setuid(id0, id1, id2, flags); } -int security_task_post_setuid(uid_t old_ruid, uid_t old_euid, - uid_t old_suid, int flags) +int security_task_fix_setuid(struct cred *new, const struct cred *old, + int flags) { - return security_ops->task_post_setuid(old_ruid, old_euid, old_suid, flags); + return security_ops->task_fix_setuid(new, old, flags); } int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags) @@ -716,14 +716,9 @@ int security_task_wait(struct task_struct *p) } int security_task_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5, long *rc_p) -{ - return security_ops->task_prctl(option, arg2, arg3, arg4, arg5, rc_p); -} - -void security_task_reparent_to_init(struct task_struct *p) + unsigned long arg4, unsigned long arg5) { - security_ops->task_reparent_to_init(p); + return security_ops->task_prctl(option, arg2, arg3, arg4, arg5); } void security_task_to_inode(struct task_struct *p, struct inode *inode) @@ -1123,9 +1118,10 @@ EXPORT_SYMBOL(security_skb_classify_flow); #ifdef CONFIG_KEYS -int security_key_alloc(struct key *key, struct task_struct *tsk, unsigned long flags) +int security_key_alloc(struct key *key, const struct cred *cred, + unsigned long flags) { - return security_ops->key_alloc(key, tsk, flags); + return security_ops->key_alloc(key, cred, flags); } void security_key_free(struct key *key) @@ -1134,9 +1130,9 @@ void security_key_free(struct key *key) } int security_key_permission(key_ref_t key_ref, - struct task_struct *context, key_perm_t perm) + const struct cred *cred, key_perm_t perm) { - return security_ops->key_permission(key_ref, context, perm); + return security_ops->key_permission(key_ref, cred, perm); } int security_key_getsecurity(struct key *key, char **_buffer) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f20cbd681ba6..c71bba78872f 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -156,20 +156,20 @@ static int selinux_secmark_enabled(void) return (atomic_read(&selinux_secmark_refcount) > 0); } -/* Allocate and free functions for each kind of security blob. */ - -static int cred_alloc_security(struct cred *cred) +/* + * initialise the security for the init task + */ +static void cred_init_security(void) { + struct cred *cred = (struct cred *) current->cred; struct task_security_struct *tsec; tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL); if (!tsec) - return -ENOMEM; + panic("SELinux: Failed to initialize initial task.\n"); - tsec->osid = tsec->sid = SECINITSID_UNLABELED; + tsec->osid = tsec->sid = SECINITSID_KERNEL; cred->security = tsec; - - return 0; } /* @@ -1378,6 +1378,19 @@ static inline u32 signal_to_av(int sig) return perm; } +/* + * Check permission between a pair of credentials + * fork check, ptrace check, etc. + */ +static int cred_has_perm(const struct cred *actor, + const struct cred *target, + u32 perms) +{ + u32 asid = cred_sid(actor), tsid = cred_sid(target); + + return avc_has_perm(asid, tsid, SECCLASS_PROCESS, perms, NULL); +} + /* * Check permission between a pair of tasks, e.g. signal checks, * fork check, ptrace check, etc. @@ -1820,24 +1833,19 @@ static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, return secondary_ops->capget(target, effective, inheritable, permitted); } -static int selinux_capset_check(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) +static int selinux_capset(struct cred *new, const struct cred *old, + const kernel_cap_t *effective, + const kernel_cap_t *inheritable, + const kernel_cap_t *permitted) { int error; - error = secondary_ops->capset_check(effective, inheritable, permitted); + error = secondary_ops->capset(new, old, + effective, inheritable, permitted); if (error) return error; - return task_has_perm(current, current, PROCESS__SETCAP); -} - -static void selinux_capset_set(const kernel_cap_t *effective, - const kernel_cap_t *inheritable, - const kernel_cap_t *permitted) -{ - secondary_ops->capset_set(effective, inheritable, permitted); + return cred_has_perm(old, new, PROCESS__SETCAP); } static int selinux_capable(struct task_struct *tsk, int cap, int audit) @@ -2244,16 +2252,23 @@ static inline void flush_unauthorized_files(const struct cred *cred, spin_unlock(&files->file_lock); } -static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +static int selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) { struct task_security_struct *tsec; struct bprm_security_struct *bsec; + struct cred *new; u32 sid; int rc; - secondary_ops->bprm_apply_creds(bprm, unsafe); + rc = secondary_ops->bprm_apply_creds(bprm, unsafe); + if (rc < 0) + return rc; - tsec = current_security(); + new = prepare_creds(); + if (!new) + return -ENOMEM; + + tsec = new->security; bsec = bprm->security; sid = bsec->sid; @@ -2268,7 +2283,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) PROCESS__SHARE, NULL); if (rc) { bsec->unsafe = 1; - return; + goto out; } } @@ -2292,12 +2307,16 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) PROCESS__PTRACE, NULL); if (rc) { bsec->unsafe = 1; - return; + goto out; } } } tsec->sid = sid; } + +out: + commit_creds(new); + return 0; } /* @@ -3021,6 +3040,7 @@ static int selinux_file_ioctl(struct file *file, unsigned int cmd, static int file_map_prot_check(struct file *file, unsigned long prot, int shared) { const struct cred *cred = current_cred(); + int rc = 0; #ifndef CONFIG_PPC32 if ((prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) { @@ -3029,9 +3049,9 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared * private file mapping that will also be writable. * This has an additional check. */ - int rc = task_has_perm(current, current, PROCESS__EXECMEM); + rc = cred_has_perm(cred, cred, PROCESS__EXECMEM); if (rc) - return rc; + goto error; } #endif @@ -3048,7 +3068,9 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared return file_has_perm(cred, file, av); } - return 0; + +error: + return rc; } static int selinux_file_mmap(struct file *file, unsigned long reqprot, @@ -3090,8 +3112,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, rc = 0; if (vma->vm_start >= vma->vm_mm->start_brk && vma->vm_end <= vma->vm_mm->brk) { - rc = task_has_perm(current, current, - PROCESS__EXECHEAP); + rc = cred_has_perm(cred, cred, PROCESS__EXECHEAP); } else if (!vma->vm_file && vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack) { @@ -3104,8 +3125,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, * modified content. This typically should only * occur for text relocations. */ - rc = file_has_perm(cred, vma->vm_file, - FILE__EXECMOD); + rc = file_has_perm(cred, vma->vm_file, FILE__EXECMOD); } if (rc) return rc; @@ -3211,6 +3231,7 @@ static int selinux_dentry_open(struct file *file, const struct cred *cred) struct file_security_struct *fsec; struct inode *inode; struct inode_security_struct *isec; + inode = file->f_path.dentry->d_inode; fsec = file->f_security; isec = inode->i_security; @@ -3247,38 +3268,41 @@ static int selinux_task_create(unsigned long clone_flags) return task_has_perm(current, current, PROCESS__FORK); } -static int selinux_cred_alloc_security(struct cred *cred) +/* + * detach and free the LSM part of a set of credentials + */ +static void selinux_cred_free(struct cred *cred) { - struct task_security_struct *tsec1, *tsec2; - int rc; - - tsec1 = current_security(); + struct task_security_struct *tsec = cred->security; + cred->security = NULL; + kfree(tsec); +} - rc = cred_alloc_security(cred); - if (rc) - return rc; - tsec2 = cred->security; +/* + * prepare a new set of credentials for modification + */ +static int selinux_cred_prepare(struct cred *new, const struct cred *old, + gfp_t gfp) +{ + const struct task_security_struct *old_tsec; + struct task_security_struct *tsec; - tsec2->osid = tsec1->osid; - tsec2->sid = tsec1->sid; + old_tsec = old->security; - /* Retain the exec, fs, key, and sock SIDs across fork */ - tsec2->exec_sid = tsec1->exec_sid; - tsec2->create_sid = tsec1->create_sid; - tsec2->keycreate_sid = tsec1->keycreate_sid; - tsec2->sockcreate_sid = tsec1->sockcreate_sid; + tsec = kmemdup(old_tsec, sizeof(struct task_security_struct), gfp); + if (!tsec) + return -ENOMEM; + new->security = tsec; return 0; } /* - * detach and free the LSM part of a set of credentials + * commit new credentials */ -static void selinux_cred_free(struct cred *cred) +static void selinux_cred_commit(struct cred *new, const struct cred *old) { - struct task_security_struct *tsec = cred->security; - cred->security = NULL; - kfree(tsec); + secondary_ops->cred_commit(new, old); } static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) @@ -3292,9 +3316,10 @@ static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) return 0; } -static int selinux_task_post_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) +static int selinux_task_fix_setuid(struct cred *new, const struct cred *old, + int flags) { - return secondary_ops->task_post_setuid(id0, id1, id2, flags); + return secondary_ops->task_fix_setuid(new, old, flags); } static int selinux_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags) @@ -3368,7 +3393,7 @@ static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim /* Control the ability to change the hard limit (whether lowering or raising it), so that the hard limit can later be used as a safe reset point for the soft limit - upon context transitions. See selinux_bprm_apply_creds. */ + upon context transitions. See selinux_bprm_committing_creds. */ if (old_rlim->rlim_max != new_rlim->rlim_max) return task_has_perm(current, current, PROCESS__SETRLIMIT); @@ -3422,13 +3447,12 @@ static int selinux_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, - unsigned long arg5, - long *rc_p) + unsigned long arg5) { /* The current prctl operations do not appear to require any SELinux controls since they merely observe or modify the state of the current process. */ - return secondary_ops->task_prctl(option, arg2, arg3, arg4, arg5, rc_p); + return secondary_ops->task_prctl(option, arg2, arg3, arg4, arg5); } static int selinux_task_wait(struct task_struct *p) @@ -3436,18 +3460,6 @@ static int selinux_task_wait(struct task_struct *p) return task_has_perm(p, current, PROCESS__SIGCHLD); } -static void selinux_task_reparent_to_init(struct task_struct *p) -{ - struct task_security_struct *tsec; - - secondary_ops->task_reparent_to_init(p); - - tsec = p->cred->security; - tsec->osid = tsec->sid; - tsec->sid = SECINITSID_KERNEL; - return; -} - static void selinux_task_to_inode(struct task_struct *p, struct inode *inode) { @@ -5325,7 +5337,8 @@ static int selinux_setprocattr(struct task_struct *p, { struct task_security_struct *tsec; struct task_struct *tracer; - u32 sid = 0; + struct cred *new; + u32 sid = 0, ptsid; int error; char *str = value; @@ -5372,86 +5385,75 @@ static int selinux_setprocattr(struct task_struct *p, return error; } + new = prepare_creds(); + if (!new) + return -ENOMEM; + /* Permission checking based on the specified context is performed during the actual operation (execve, open/mkdir/...), when we know the full context of the - operation. See selinux_bprm_set_security for the execve + operation. See selinux_bprm_set_creds for the execve checks and may_create for the file creation checks. The operation will then fail if the context is not permitted. */ - tsec = p->cred->security; - if (!strcmp(name, "exec")) + tsec = new->security; + if (!strcmp(name, "exec")) { tsec->exec_sid = sid; - else if (!strcmp(name, "fscreate")) + } else if (!strcmp(name, "fscreate")) { tsec->create_sid = sid; - else if (!strcmp(name, "keycreate")) { + } else if (!strcmp(name, "keycreate")) { error = may_create_key(sid, p); if (error) - return error; + goto abort_change; tsec->keycreate_sid = sid; - } else if (!strcmp(name, "sockcreate")) + } else if (!strcmp(name, "sockcreate")) { tsec->sockcreate_sid = sid; - else if (!strcmp(name, "current")) { - struct av_decision avd; - + } else if (!strcmp(name, "current")) { + error = -EINVAL; if (sid == 0) - return -EINVAL; - /* - * SELinux allows to change context in the following case only. - * - Single threaded processes. - * - Multi threaded processes intend to change its context into - * more restricted domain (defined by TYPEBOUNDS statement). - */ - if (atomic_read(&p->mm->mm_users) != 1) { - struct task_struct *g, *t; - struct mm_struct *mm = p->mm; - read_lock(&tasklist_lock); - do_each_thread(g, t) { - if (t->mm == mm && t != p) { - read_unlock(&tasklist_lock); - error = security_bounded_transition(tsec->sid, sid); - if (!error) - goto boundary_ok; - - return error; - } - } while_each_thread(g, t); - read_unlock(&tasklist_lock); + goto abort_change; + + /* Only allow single threaded processes to change context */ + error = -EPERM; + if (!is_single_threaded(p)) { + error = security_bounded_transition(tsec->sid, sid); + if (error) + goto abort_change; } -boundary_ok: /* Check permissions for the transition. */ error = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS, PROCESS__DYNTRANSITION, NULL); if (error) - return error; + goto abort_change; /* Check for ptracing, and update the task SID if ok. Otherwise, leave SID unchanged and fail. */ + ptsid = 0; task_lock(p); - rcu_read_lock(); tracer = tracehook_tracer_task(p); - if (tracer != NULL) { - u32 ptsid = task_sid(tracer); - rcu_read_unlock(); - error = avc_has_perm_noaudit(ptsid, sid, - SECCLASS_PROCESS, - PROCESS__PTRACE, 0, &avd); - if (!error) - tsec->sid = sid; - task_unlock(p); - avc_audit(ptsid, sid, SECCLASS_PROCESS, - PROCESS__PTRACE, &avd, error, NULL); + if (tracer) + ptsid = task_sid(tracer); + task_unlock(p); + + if (tracer) { + error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS, + PROCESS__PTRACE, NULL); if (error) - return error; - } else { - rcu_read_unlock(); - tsec->sid = sid; - task_unlock(p); + goto abort_change; } - } else - return -EINVAL; + tsec->sid = sid; + } else { + error = -EINVAL; + goto abort_change; + } + + commit_creds(new); return size; + +abort_change: + abort_creds(new); + return error; } static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) @@ -5471,23 +5473,21 @@ static void selinux_release_secctx(char *secdata, u32 seclen) #ifdef CONFIG_KEYS -static int selinux_key_alloc(struct key *k, struct task_struct *tsk, +static int selinux_key_alloc(struct key *k, const struct cred *cred, unsigned long flags) { - const struct task_security_struct *__tsec; + const struct task_security_struct *tsec; struct key_security_struct *ksec; ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL); if (!ksec) return -ENOMEM; - rcu_read_lock(); - __tsec = __task_cred(tsk)->security; - if (__tsec->keycreate_sid) - ksec->sid = __tsec->keycreate_sid; + tsec = cred->security; + if (tsec->keycreate_sid) + ksec->sid = tsec->keycreate_sid; else - ksec->sid = __tsec->sid; - rcu_read_unlock(); + ksec->sid = tsec->sid; k->security = ksec; return 0; @@ -5502,8 +5502,8 @@ static void selinux_key_free(struct key *k) } static int selinux_key_permission(key_ref_t key_ref, - struct task_struct *ctx, - key_perm_t perm) + const struct cred *cred, + key_perm_t perm) { struct key *key; struct key_security_struct *ksec; @@ -5515,7 +5515,7 @@ static int selinux_key_permission(key_ref_t key_ref, if (perm == 0) return 0; - sid = task_sid(ctx); + sid = cred_sid(cred); key = key_ref_to_ptr(key_ref); ksec = key->security; @@ -5545,8 +5545,7 @@ static struct security_operations selinux_ops = { .ptrace_may_access = selinux_ptrace_may_access, .ptrace_traceme = selinux_ptrace_traceme, .capget = selinux_capget, - .capset_check = selinux_capset_check, - .capset_set = selinux_capset_set, + .capset = selinux_capset, .sysctl = selinux_sysctl, .capable = selinux_capable, .quotactl = selinux_quotactl, @@ -5621,10 +5620,11 @@ static struct security_operations selinux_ops = { .dentry_open = selinux_dentry_open, .task_create = selinux_task_create, - .cred_alloc_security = selinux_cred_alloc_security, .cred_free = selinux_cred_free, + .cred_prepare = selinux_cred_prepare, + .cred_commit = selinux_cred_commit, .task_setuid = selinux_task_setuid, - .task_post_setuid = selinux_task_post_setuid, + .task_fix_setuid = selinux_task_fix_setuid, .task_setgid = selinux_task_setgid, .task_setpgid = selinux_task_setpgid, .task_getpgid = selinux_task_getpgid, @@ -5641,7 +5641,6 @@ static struct security_operations selinux_ops = { .task_kill = selinux_task_kill, .task_wait = selinux_task_wait, .task_prctl = selinux_task_prctl, - .task_reparent_to_init = selinux_task_reparent_to_init, .task_to_inode = selinux_task_to_inode, .ipc_permission = selinux_ipc_permission, @@ -5737,8 +5736,6 @@ static struct security_operations selinux_ops = { static __init int selinux_init(void) { - struct task_security_struct *tsec; - if (!security_module_enable(&selinux_ops)) { selinux_enabled = 0; return 0; @@ -5752,10 +5749,7 @@ static __init int selinux_init(void) printk(KERN_INFO "SELinux: Initializing.\n"); /* Set the security state for the initial task. */ - if (cred_alloc_security(current->cred)) - panic("SELinux: Failed to initialize initial task.\n"); - tsec = current->cred->security; - tsec->osid = tsec->sid = SECINITSID_KERNEL; + cred_init_security(); sel_inode_cache = kmem_cache_create("selinux_inode_security", sizeof(struct inode_security_struct), diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 11167fd567b9..e952b397153d 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -104,8 +104,7 @@ static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode) if (rc != 0) return rc; - rc = smk_access(current->cred->security, ctp->cred->security, - MAY_READWRITE); + rc = smk_access(current_security(), task_security(ctp), MAY_READWRITE); if (rc != 0 && capable(CAP_MAC_OVERRIDE)) return 0; return rc; @@ -127,8 +126,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp) if (rc != 0) return rc; - rc = smk_access(ptp->cred->security, current->cred->security, - MAY_READWRITE); + rc = smk_access(task_security(ptp), current_security(), MAY_READWRITE); if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE)) return 0; return rc; @@ -976,22 +974,6 @@ static int smack_file_receive(struct file *file) * Task hooks */ -/** - * smack_cred_alloc_security - "allocate" a task cred blob - * @cred: the task creds in need of a blob - * - * Smack isn't using copies of blobs. Everyone - * points to an immutable list. No alloc required. - * No data copy required. - * - * Always returns 0 - */ -static int smack_cred_alloc_security(struct cred *cred) -{ - cred->security = current_security(); - return 0; -} - /** * smack_cred_free - "free" task-level security credentials * @cred: the credentials in question @@ -1005,6 +987,30 @@ static void smack_cred_free(struct cred *cred) cred->security = NULL; } +/** + * smack_cred_prepare - prepare new set of credentials for modification + * @new: the new credentials + * @old: the original credentials + * @gfp: the atomicity of any memory allocations + * + * Prepare a new set of credentials for modification. + */ +static int smack_cred_prepare(struct cred *new, const struct cred *old, + gfp_t gfp) +{ + new->security = old->security; + return 0; +} + +/* + * commit new credentials + * @new: the new credentials + * @old: the original credentials + */ +static void smack_cred_commit(struct cred *new, const struct cred *old) +{ +} + /** * smack_task_setpgid - Smack check on setting pgid * @p: the task object @@ -2036,6 +2042,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value) static int smack_setprocattr(struct task_struct *p, char *name, void *value, size_t size) { + struct cred *new; char *newsmack; /* @@ -2058,7 +2065,11 @@ static int smack_setprocattr(struct task_struct *p, char *name, if (newsmack == NULL) return -EINVAL; - p->cred->security = newsmack; + new = prepare_creds(); + if (!new) + return -ENOMEM; + new->security = newsmack; + commit_creds(new); return size; } @@ -2354,17 +2365,17 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, /** * smack_key_alloc - Set the key security blob * @key: object - * @tsk: the task associated with the key + * @cred: the credentials to use * @flags: unused * * No allocation required * * Returns 0 */ -static int smack_key_alloc(struct key *key, struct task_struct *tsk, +static int smack_key_alloc(struct key *key, const struct cred *cred, unsigned long flags) { - key->security = tsk->cred->security; + key->security = cred->security; return 0; } @@ -2382,14 +2393,14 @@ static void smack_key_free(struct key *key) /* * smack_key_permission - Smack access on a key * @key_ref: gets to the object - * @context: task involved + * @cred: the credentials to use * @perm: unused * * Return 0 if the task has read and write to the object, * an error code otherwise */ static int smack_key_permission(key_ref_t key_ref, - struct task_struct *context, key_perm_t perm) + const struct cred *cred, key_perm_t perm) { struct key *keyp; @@ -2405,11 +2416,10 @@ static int smack_key_permission(key_ref_t key_ref, /* * This should not occur */ - if (context->cred->security == NULL) + if (cred->security == NULL) return -EACCES; - return smk_access(context->cred->security, keyp->security, - MAY_READWRITE); + return smk_access(cred->security, keyp->security, MAY_READWRITE); } #endif /* CONFIG_KEYS */ @@ -2580,8 +2590,7 @@ struct security_operations smack_ops = { .ptrace_may_access = smack_ptrace_may_access, .ptrace_traceme = smack_ptrace_traceme, .capget = cap_capget, - .capset_check = cap_capset_check, - .capset_set = cap_capset_set, + .capset = cap_capset, .capable = cap_capable, .syslog = smack_syslog, .settime = cap_settime, @@ -2630,9 +2639,10 @@ struct security_operations smack_ops = { .file_send_sigiotask = smack_file_send_sigiotask, .file_receive = smack_file_receive, - .cred_alloc_security = smack_cred_alloc_security, .cred_free = smack_cred_free, - .task_post_setuid = cap_task_post_setuid, + .cred_prepare = smack_cred_prepare, + .cred_commit = smack_cred_commit, + .task_fix_setuid = cap_task_fix_setuid, .task_setpgid = smack_task_setpgid, .task_getpgid = smack_task_getpgid, .task_getsid = smack_task_getsid, @@ -2645,7 +2655,6 @@ struct security_operations smack_ops = { .task_movememory = smack_task_movememory, .task_kill = smack_task_kill, .task_wait = smack_task_wait, - .task_reparent_to_init = cap_task_reparent_to_init, .task_to_inode = smack_task_to_inode, .task_prctl = cap_task_prctl, @@ -2721,6 +2730,8 @@ struct security_operations smack_ops = { */ static __init int smack_init(void) { + struct cred *cred; + if (!security_module_enable(&smack_ops)) return 0; @@ -2729,7 +2740,8 @@ static __init int smack_init(void) /* * Set the security state for the initial task. */ - current->cred->security = &smack_known_floor.smk_known; + cred = (struct cred *) current->cred; + cred->security = &smack_known_floor.smk_known; /* * Initialize locks -- cgit v1.2.3 From a6f76f23d297f70e2a6b3ec607f7aeeea9e37e8d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:24 +1100 Subject: CRED: Make execve() take advantage of copy-on-write credentials Make execve() take advantage of copy-on-write credentials, allowing it to set up the credentials in advance, and then commit the whole lot after the point of no return. This patch and the preceding patches have been tested with the LTP SELinux testsuite. This patch makes several logical sets of alteration: (1) execve(). The credential bits from struct linux_binprm are, for the most part, replaced with a single credentials pointer (bprm->cred). This means that all the creds can be calculated in advance and then applied at the point of no return with no possibility of failure. I would like to replace bprm->cap_effective with: cap_isclear(bprm->cap_effective) but this seems impossible due to special behaviour for processes of pid 1 (they always retain their parent's capability masks where normally they'd be changed - see cap_bprm_set_creds()). The following sequence of events now happens: (a) At the start of do_execve, the current task's cred_exec_mutex is locked to prevent PTRACE_ATTACH from obsoleting the calculation of creds that we make. (a) prepare_exec_creds() is then called to make a copy of the current task's credentials and prepare it. This copy is then assigned to bprm->cred. This renders security_bprm_alloc() and security_bprm_free() unnecessary, and so they've been removed. (b) The determination of unsafe execution is now performed immediately after (a) rather than later on in the code. The result is stored in bprm->unsafe for future reference. (c) prepare_binprm() is called, possibly multiple times. (i) This applies the result of set[ug]id binaries to the new creds attached to bprm->cred. Personality bit clearance is recorded, but now deferred on the basis that the exec procedure may yet fail. (ii) This then calls the new security_bprm_set_creds(). This should calculate the new LSM and capability credentials into *bprm->cred. This folds together security_bprm_set() and parts of security_bprm_apply_creds() (these two have been removed). Anything that might fail must be done at this point. (iii) bprm->cred_prepared is set to 1. bprm->cred_prepared is 0 on the first pass of the security calculations, and 1 on all subsequent passes. This allows SELinux in (ii) to base its calculations only on the initial script and not on the interpreter. (d) flush_old_exec() is called to commit the task to execution. This performs the following steps with regard to credentials: (i) Clear pdeath_signal and set dumpable on certain circumstances that may not be covered by commit_creds(). (ii) Clear any bits in current->personality that were deferred from (c.i). (e) install_exec_creds() [compute_creds() as was] is called to install the new credentials. This performs the following steps with regard to credentials: (i) Calls security_bprm_committing_creds() to apply any security requirements, such as flushing unauthorised files in SELinux, that must be done before the credentials are changed. This is made up of bits of security_bprm_apply_creds() and security_bprm_post_apply_creds(), both of which have been removed. This function is not allowed to fail; anything that might fail must have been done in (c.ii). (ii) Calls commit_creds() to apply the new credentials in a single assignment (more or less). Possibly pdeath_signal and dumpable should be part of struct creds. (iii) Unlocks the task's cred_replace_mutex, thus allowing PTRACE_ATTACH to take place. (iv) Clears The bprm->cred pointer as the credentials it was holding are now immutable. (v) Calls security_bprm_committed_creds() to apply any security alterations that must be done after the creds have been changed. SELinux uses this to flush signals and signal handlers. (f) If an error occurs before (d.i), bprm_free() will call abort_creds() to destroy the proposed new credentials and will then unlock cred_replace_mutex. No changes to the credentials will have been made. (2) LSM interface. A number of functions have been changed, added or removed: (*) security_bprm_alloc(), ->bprm_alloc_security() (*) security_bprm_free(), ->bprm_free_security() Removed in favour of preparing new credentials and modifying those. (*) security_bprm_apply_creds(), ->bprm_apply_creds() (*) security_bprm_post_apply_creds(), ->bprm_post_apply_creds() Removed; split between security_bprm_set_creds(), security_bprm_committing_creds() and security_bprm_committed_creds(). (*) security_bprm_set(), ->bprm_set_security() Removed; folded into security_bprm_set_creds(). (*) security_bprm_set_creds(), ->bprm_set_creds() New. The new credentials in bprm->creds should be checked and set up as appropriate. bprm->cred_prepared is 0 on the first call, 1 on the second and subsequent calls. (*) security_bprm_committing_creds(), ->bprm_committing_creds() (*) security_bprm_committed_creds(), ->bprm_committed_creds() New. Apply the security effects of the new credentials. This includes closing unauthorised files in SELinux. This function may not fail. When the former is called, the creds haven't yet been applied to the process; when the latter is called, they have. The former may access bprm->cred, the latter may not. (3) SELinux. SELinux has a number of changes, in addition to those to support the LSM interface changes mentioned above: (a) The bprm_security_struct struct has been removed in favour of using the credentials-under-construction approach. (c) flush_unauthorized_files() now takes a cred pointer and passes it on to inode_has_perm(), file_has_perm() and dentry_open(). Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- arch/x86/ia32/ia32_aout.c | 2 +- fs/binfmt_aout.c | 2 +- fs/binfmt_elf.c | 2 +- fs/binfmt_elf_fdpic.c | 2 +- fs/binfmt_flat.c | 2 +- fs/binfmt_som.c | 2 +- fs/compat.c | 42 +++--- fs/exec.c | 149 +++++++++++--------- fs/internal.h | 6 + include/linux/audit.h | 16 --- include/linux/binfmts.h | 16 ++- include/linux/cred.h | 3 +- include/linux/key.h | 2 - include/linux/security.h | 103 +++++--------- kernel/cred.c | 46 ++++++- security/capability.c | 19 +-- security/commoncap.c | 152 ++++++++++---------- security/keys/process_keys.c | 42 ------ security/root_plug.c | 13 +- security/security.c | 26 ++-- security/selinux/hooks.c | 283 ++++++++++++++++---------------------- security/selinux/include/objsec.h | 11 -- security/smack/smack_lsm.c | 3 +- 23 files changed, 429 insertions(+), 515 deletions(-) (limited to 'include') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 127ec3f07214..2a4d073d2cf1 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -327,7 +327,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->cached_hole_size = 0; current->mm->mmap = NULL; - compute_creds(bprm); + install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; if (N_MAGIC(ex) == OMAGIC) { diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 204cfd1d7676..f1f3f4192a60 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -320,7 +320,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) current->mm->free_area_cache = current->mm->mmap_base; current->mm->cached_hole_size = 0; - compute_creds(bprm); + install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; #ifdef __sparc__ if (N_MAGIC(ex) == NMAGIC) { diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 9142ff5dc8e6..f458c1217c5e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -956,7 +956,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) } #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ - compute_creds(bprm); + install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; retval = create_elf_tables(bprm, &loc->elf_ex, load_addr, interp_load_addr); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 45dabd59936f..aa5b43205e37 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -404,7 +404,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, current->mm->start_stack = current->mm->start_brk + stack_size; #endif - compute_creds(bprm); + install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; if (create_elf_fdpic_tables(bprm, current->mm, &exec_params, &interp_params) < 0) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index ccb781a6a804..7bbd5c6b3725 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -880,7 +880,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) (libinfo.lib_list[j].loaded)? libinfo.lib_list[j].start_data:UNLOADED_LIB; - compute_creds(bprm); + install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; set_binfmt(&flat_format); diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 74e587a52796..08644a61616e 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -255,7 +255,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) kfree(hpuxhdr); set_binfmt(&som_format); - compute_creds(bprm); + install_exec_creds(bprm); setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); create_som_tables(bprm); diff --git a/fs/compat.c b/fs/compat.c index e5f49f538502..d1ece79b6411 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1393,10 +1393,20 @@ int compat_do_execve(char * filename, if (!bprm) goto out_ret; + retval = mutex_lock_interruptible(¤t->cred_exec_mutex); + if (retval < 0) + goto out_free; + + retval = -ENOMEM; + bprm->cred = prepare_exec_creds(); + if (!bprm->cred) + goto out_unlock; + check_unsafe_exec(bprm); + file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) - goto out_kfree; + goto out_unlock; sched_exec(); @@ -1410,14 +1420,10 @@ int compat_do_execve(char * filename, bprm->argc = compat_count(argv, MAX_ARG_STRINGS); if ((retval = bprm->argc) < 0) - goto out_mm; + goto out; bprm->envc = compat_count(envp, MAX_ARG_STRINGS); if ((retval = bprm->envc) < 0) - goto out_mm; - - retval = security_bprm_alloc(bprm); - if (retval) goto out; retval = prepare_binprm(bprm); @@ -1438,19 +1444,16 @@ int compat_do_execve(char * filename, goto out; retval = search_binary_handler(bprm, regs); - if (retval >= 0) { - /* execve success */ - security_bprm_free(bprm); - acct_update_integrals(current); - free_bprm(bprm); - return retval; - } + if (retval < 0) + goto out; -out: - if (bprm->security) - security_bprm_free(bprm); + /* execve succeeded */ + mutex_unlock(¤t->cred_exec_mutex); + acct_update_integrals(current); + free_bprm(bprm); + return retval; -out_mm: +out: if (bprm->mm) mmput(bprm->mm); @@ -1460,7 +1463,10 @@ out_file: fput(bprm->file); } -out_kfree: +out_unlock: + mutex_unlock(¤t->cred_exec_mutex); + +out_free: free_bprm(bprm); out_ret: diff --git a/fs/exec.c b/fs/exec.c index 9bd3559ddece..32f13e299417 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -55,6 +55,7 @@ #include #include #include +#include "internal.h" #ifdef __alpha__ /* for /sbin/loader handling in search_binary_handler() */ @@ -1007,15 +1008,17 @@ int flush_old_exec(struct linux_binprm * bprm) */ current->mm->task_size = TASK_SIZE; - if (bprm->e_uid != current_euid() || - bprm->e_gid != current_egid()) { - set_dumpable(current->mm, suid_dumpable); + /* install the new credentials */ + if (bprm->cred->uid != current_euid() || + bprm->cred->gid != current_egid()) { current->pdeath_signal = 0; } else if (file_permission(bprm->file, MAY_READ) || - (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { + bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) { set_dumpable(current->mm, suid_dumpable); } + current->personality &= ~bprm->per_clear; + /* An exec changes our domain. We are no longer part of the thread group */ @@ -1032,13 +1035,50 @@ out: EXPORT_SYMBOL(flush_old_exec); +/* + * install the new credentials for this executable + */ +void install_exec_creds(struct linux_binprm *bprm) +{ + security_bprm_committing_creds(bprm); + + commit_creds(bprm->cred); + bprm->cred = NULL; + + /* cred_exec_mutex must be held at least to this point to prevent + * ptrace_attach() from altering our determination of the task's + * credentials; any time after this it may be unlocked */ + + security_bprm_committed_creds(bprm); +} +EXPORT_SYMBOL(install_exec_creds); + +/* + * determine how safe it is to execute the proposed program + * - the caller must hold current->cred_exec_mutex to protect against + * PTRACE_ATTACH + */ +void check_unsafe_exec(struct linux_binprm *bprm) +{ + struct task_struct *p = current; + + bprm->unsafe = tracehook_unsafe_exec(p); + + if (atomic_read(&p->fs->count) > 1 || + atomic_read(&p->files->count) > 1 || + atomic_read(&p->sighand->count) > 1) + bprm->unsafe |= LSM_UNSAFE_SHARE; +} + /* * Fill the binprm structure from the inode. * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes + * + * This may be called multiple times for binary chains (scripts for example). */ int prepare_binprm(struct linux_binprm *bprm) { - int mode; + umode_t mode; struct inode * inode = bprm->file->f_path.dentry->d_inode; int retval; @@ -1046,14 +1086,15 @@ int prepare_binprm(struct linux_binprm *bprm) if (bprm->file->f_op == NULL) return -EACCES; - bprm->e_uid = current_euid(); - bprm->e_gid = current_egid(); + /* clear any previous set[ug]id data from a previous binary */ + bprm->cred->euid = current_euid(); + bprm->cred->egid = current_egid(); - if(!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) { + if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) { /* Set-uid? */ if (mode & S_ISUID) { - current->personality &= ~PER_CLEAR_ON_SETID; - bprm->e_uid = inode->i_uid; + bprm->per_clear |= PER_CLEAR_ON_SETID; + bprm->cred->euid = inode->i_uid; } /* Set-gid? */ @@ -1063,50 +1104,23 @@ int prepare_binprm(struct linux_binprm *bprm) * executable. */ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { - current->personality &= ~PER_CLEAR_ON_SETID; - bprm->e_gid = inode->i_gid; + bprm->per_clear |= PER_CLEAR_ON_SETID; + bprm->cred->egid = inode->i_gid; } } /* fill in binprm security blob */ - retval = security_bprm_set(bprm); + retval = security_bprm_set_creds(bprm); if (retval) return retval; + bprm->cred_prepared = 1; - memset(bprm->buf,0,BINPRM_BUF_SIZE); - return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE); + memset(bprm->buf, 0, BINPRM_BUF_SIZE); + return kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE); } EXPORT_SYMBOL(prepare_binprm); -static int unsafe_exec(struct task_struct *p) -{ - int unsafe = tracehook_unsafe_exec(p); - - if (atomic_read(&p->fs->count) > 1 || - atomic_read(&p->files->count) > 1 || - atomic_read(&p->sighand->count) > 1) - unsafe |= LSM_UNSAFE_SHARE; - - return unsafe; -} - -void compute_creds(struct linux_binprm *bprm) -{ - int unsafe; - - if (bprm->e_uid != current_uid()) - current->pdeath_signal = 0; - exec_keys(current); - - task_lock(current); - unsafe = unsafe_exec(current); - security_bprm_apply_creds(bprm, unsafe); - task_unlock(current); - security_bprm_post_apply_creds(bprm); -} -EXPORT_SYMBOL(compute_creds); - /* * Arguments are '\0' separated strings found at the location bprm->p * points to; chop off the first by relocating brpm->p to right after @@ -1259,6 +1273,8 @@ EXPORT_SYMBOL(search_binary_handler); void free_bprm(struct linux_binprm *bprm) { free_arg_pages(bprm); + if (bprm->cred) + abort_creds(bprm->cred); kfree(bprm); } @@ -1284,10 +1300,20 @@ int do_execve(char * filename, if (!bprm) goto out_files; + retval = mutex_lock_interruptible(¤t->cred_exec_mutex); + if (retval < 0) + goto out_free; + + retval = -ENOMEM; + bprm->cred = prepare_exec_creds(); + if (!bprm->cred) + goto out_unlock; + check_unsafe_exec(bprm); + file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) - goto out_kfree; + goto out_unlock; sched_exec(); @@ -1301,14 +1327,10 @@ int do_execve(char * filename, bprm->argc = count(argv, MAX_ARG_STRINGS); if ((retval = bprm->argc) < 0) - goto out_mm; + goto out; bprm->envc = count(envp, MAX_ARG_STRINGS); if ((retval = bprm->envc) < 0) - goto out_mm; - - retval = security_bprm_alloc(bprm); - if (retval) goto out; retval = prepare_binprm(bprm); @@ -1330,21 +1352,18 @@ int do_execve(char * filename, current->flags &= ~PF_KTHREAD; retval = search_binary_handler(bprm,regs); - if (retval >= 0) { - /* execve success */ - security_bprm_free(bprm); - acct_update_integrals(current); - free_bprm(bprm); - if (displaced) - put_files_struct(displaced); - return retval; - } + if (retval < 0) + goto out; -out: - if (bprm->security) - security_bprm_free(bprm); + /* execve succeeded */ + mutex_unlock(¤t->cred_exec_mutex); + acct_update_integrals(current); + free_bprm(bprm); + if (displaced) + put_files_struct(displaced); + return retval; -out_mm: +out: if (bprm->mm) mmput (bprm->mm); @@ -1353,7 +1372,11 @@ out_file: allow_write_access(bprm->file); fput(bprm->file); } -out_kfree: + +out_unlock: + mutex_unlock(¤t->cred_exec_mutex); + +out_free: free_bprm(bprm); out_files: diff --git a/fs/internal.h b/fs/internal.h index 80aa9a023372..53af885f1732 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -10,6 +10,7 @@ */ struct super_block; +struct linux_binprm; /* * block_dev.c @@ -39,6 +40,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb) */ extern void __init chrdev_init(void); +/* + * exec.c + */ +extern void check_unsafe_exec(struct linux_binprm *); + /* * namespace.c */ diff --git a/include/linux/audit.h b/include/linux/audit.h index 0b2fcb698a63..e8ce2c4c7ac7 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -508,22 +508,6 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) return 0; } -/* - * ieieeeeee, an audit function without a return code! - * - * This function might fail! I decided that it didn't matter. We are too late - * to fail the syscall and the information isn't REQUIRED for any purpose. It's - * just nice to have. We should be able to look at past audit logs to figure - * out this process's current cap set along with the fcaps from the PATH record - * and use that to come up with the final set. Yeah, its ugly, but all the info - * is still in the audit log. So I'm not going to bother mentioning we failed - * if we couldn't allocate memory. - * - * If someone changes their mind they could create the aux record earlier and - * then search here and use that earlier allocation. But I don't wanna. - * - * -Eric - */ static inline int audit_log_bprm_fcaps(struct linux_binprm *bprm, const struct cred *new, const struct cred *old) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 7394b5b349ff..6cbfbe297180 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -35,16 +35,20 @@ struct linux_binprm{ struct mm_struct *mm; unsigned long p; /* current top of mem */ unsigned int sh_bang:1, - misc_bang:1; + misc_bang:1, + cred_prepared:1,/* true if creds already prepared (multiple + * preps happen for interpreters) */ + cap_effective:1;/* true if has elevated effective capabilities, + * false if not; except for init which inherits + * its parent's caps anyway */ #ifdef __alpha__ unsigned int taso:1; #endif unsigned int recursion_depth; struct file * file; - int e_uid, e_gid; - kernel_cap_t cap_post_exec_permitted; - bool cap_effective; - void *security; + struct cred *cred; /* new credentials */ + int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */ + unsigned int per_clear; /* bits to clear in current->personality */ int argc, envc; char * filename; /* Name of binary as seen by procps */ char * interp; /* Name of the binary really executed. Most @@ -101,7 +105,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm, int executable_stack); extern int bprm_mm_init(struct linux_binprm *bprm); extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); -extern void compute_creds(struct linux_binprm *binprm); +extern void install_exec_creds(struct linux_binprm *bprm); extern int do_coredump(long signr, int exit_code, struct pt_regs * regs); extern int set_binfmt(struct linux_binfmt *new); extern void free_bprm(struct linux_binprm *); diff --git a/include/linux/cred.h b/include/linux/cred.h index eaf6fa695a04..8edb4d1d5427 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -84,8 +84,6 @@ struct thread_group_cred { struct key *process_keyring; /* keyring private to this process */ struct rcu_head rcu; /* RCU deletion hook */ }; - -extern void release_tgcred(struct cred *cred); #endif /* @@ -144,6 +142,7 @@ struct cred { extern void __put_cred(struct cred *); extern int copy_creds(struct task_struct *, unsigned long); extern struct cred *prepare_creds(void); +extern struct cred *prepare_exec_creds(void); extern struct cred *prepare_usermodehelper_creds(void); extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); diff --git a/include/linux/key.h b/include/linux/key.h index 69ecf0934b02..21d32a142c00 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -278,7 +278,6 @@ extern ctl_table key_sysctls[]; * the userspace interface */ extern int install_thread_keyring_to_cred(struct cred *cred); -extern int exec_keys(struct task_struct *tsk); extern void key_fsuid_changed(struct task_struct *tsk); extern void key_fsgid_changed(struct task_struct *tsk); extern void key_init(void); @@ -294,7 +293,6 @@ extern void key_init(void); #define make_key_ref(k, p) NULL #define key_ref_to_ptr(k) NULL #define is_key_possessed(k) 0 -#define exec_keys(t) do { } while(0) #define key_fsuid_changed(t) do { } while(0) #define key_fsgid_changed(t) do { } while(0) #define key_init() do { } while(0) diff --git a/include/linux/security.h b/include/linux/security.h index 68be11251447..56a0eed65673 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -57,8 +57,7 @@ extern int cap_capset(struct cred *new, const struct cred *old, const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted); -extern int cap_bprm_set_security(struct linux_binprm *bprm); -extern int cap_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); +extern int cap_bprm_set_creds(struct linux_binprm *bprm); extern int cap_bprm_secureexec(struct linux_binprm *bprm); extern int cap_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); @@ -110,7 +109,7 @@ extern unsigned long mmap_min_addr; struct sched_param; struct request_sock; -/* bprm_apply_creds unsafe reasons */ +/* bprm->unsafe reasons */ #define LSM_UNSAFE_SHARE 1 #define LSM_UNSAFE_PTRACE 2 #define LSM_UNSAFE_PTRACE_CAP 4 @@ -154,36 +153,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * * Security hooks for program execution operations. * - * @bprm_alloc_security: - * Allocate and attach a security structure to the @bprm->security field. - * The security field is initialized to NULL when the bprm structure is - * allocated. - * @bprm contains the linux_binprm structure to be modified. - * Return 0 if operation was successful. - * @bprm_free_security: - * @bprm contains the linux_binprm structure to be modified. - * Deallocate and clear the @bprm->security field. - * @bprm_apply_creds: - * Compute and set the security attributes of a process being transformed - * by an execve operation based on the old attributes (current->security) - * and the information saved in @bprm->security by the set_security hook. - * Since this function may return an error, in which case the process will - * be killed. However, it can leave the security attributes of the - * process unchanged if an access failure occurs at this point. - * bprm_apply_creds is called under task_lock. @unsafe indicates various - * reasons why it may be unsafe to change security state. - * @bprm contains the linux_binprm structure. - * @bprm_post_apply_creds: - * Runs after bprm_apply_creds with the task_lock dropped, so that - * functions which cannot be called safely under the task_lock can - * be used. This hook is a good place to perform state changes on - * the process such as closing open file descriptors to which access - * is no longer granted if the attributes were changed. - * Note that a security module might need to save state between - * bprm_apply_creds and bprm_post_apply_creds to store the decision - * on whether the process may proceed. - * @bprm contains the linux_binprm structure. - * @bprm_set_security: + * @bprm_set_creds: * Save security information in the bprm->security field, typically based * on information about the bprm->file, for later use by the apply_creds * hook. This hook may also optionally check permissions (e.g. for @@ -196,15 +166,30 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @bprm contains the linux_binprm structure. * Return 0 if the hook is successful and permission is granted. * @bprm_check_security: - * This hook mediates the point when a search for a binary handler will - * begin. It allows a check the @bprm->security value which is set in - * the preceding set_security call. The primary difference from - * set_security is that the argv list and envp list are reliably - * available in @bprm. This hook may be called multiple times - * during a single execve; and in each pass set_security is called - * first. + * This hook mediates the point when a search for a binary handler will + * begin. It allows a check the @bprm->security value which is set in the + * preceding set_creds call. The primary difference from set_creds is + * that the argv list and envp list are reliably available in @bprm. This + * hook may be called multiple times during a single execve; and in each + * pass set_creds is called first. * @bprm contains the linux_binprm structure. * Return 0 if the hook is successful and permission is granted. + * @bprm_committing_creds: + * Prepare to install the new security attributes of a process being + * transformed by an execve operation, based on the old credentials + * pointed to by @current->cred and the information set in @bprm->cred by + * the bprm_set_creds hook. @bprm points to the linux_binprm structure. + * This hook is a good place to perform state changes on the process such + * as closing open file descriptors to which access will no longer be + * granted when the attributes are changed. This is called immediately + * before commit_creds(). + * @bprm_committed_creds: + * Tidy up after the installation of the new security attributes of a + * process being transformed by an execve operation. The new credentials + * have, by this point, been set to @current->cred. @bprm points to the + * linux_binprm structure. This hook is a good place to perform state + * changes on the process such as clearing out non-inheritable signal + * state. This is called immediately after commit_creds(). * @bprm_secureexec: * Return a boolean value (0 or 1) indicating whether a "secure exec" * is required. The flag is passed in the auxiliary table @@ -1301,13 +1286,11 @@ struct security_operations { int (*settime) (struct timespec *ts, struct timezone *tz); int (*vm_enough_memory) (struct mm_struct *mm, long pages); - int (*bprm_alloc_security) (struct linux_binprm *bprm); - void (*bprm_free_security) (struct linux_binprm *bprm); - int (*bprm_apply_creds) (struct linux_binprm *bprm, int unsafe); - void (*bprm_post_apply_creds) (struct linux_binprm *bprm); - int (*bprm_set_security) (struct linux_binprm *bprm); + int (*bprm_set_creds) (struct linux_binprm *bprm); int (*bprm_check_security) (struct linux_binprm *bprm); int (*bprm_secureexec) (struct linux_binprm *bprm); + void (*bprm_committing_creds) (struct linux_binprm *bprm); + void (*bprm_committed_creds) (struct linux_binprm *bprm); int (*sb_alloc_security) (struct super_block *sb); void (*sb_free_security) (struct super_block *sb); @@ -1569,12 +1552,10 @@ int security_settime(struct timespec *ts, struct timezone *tz); int security_vm_enough_memory(long pages); int security_vm_enough_memory_mm(struct mm_struct *mm, long pages); int security_vm_enough_memory_kern(long pages); -int security_bprm_alloc(struct linux_binprm *bprm); -void security_bprm_free(struct linux_binprm *bprm); -int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe); -void security_bprm_post_apply_creds(struct linux_binprm *bprm); -int security_bprm_set(struct linux_binprm *bprm); +int security_bprm_set_creds(struct linux_binprm *bprm); int security_bprm_check(struct linux_binprm *bprm); +void security_bprm_committing_creds(struct linux_binprm *bprm); +void security_bprm_committed_creds(struct linux_binprm *bprm); int security_bprm_secureexec(struct linux_binprm *bprm); int security_sb_alloc(struct super_block *sb); void security_sb_free(struct super_block *sb); @@ -1812,32 +1793,22 @@ static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) return cap_vm_enough_memory(mm, pages); } -static inline int security_bprm_alloc(struct linux_binprm *bprm) -{ - return 0; -} - -static inline void security_bprm_free(struct linux_binprm *bprm) -{ } - -static inline int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +static inline int security_bprm_set_creds(struct linux_binprm *bprm) { - return cap_bprm_apply_creds(bprm, unsafe); + return cap_bprm_set_creds(bprm); } -static inline void security_bprm_post_apply_creds(struct linux_binprm *bprm) +static inline int security_bprm_check(struct linux_binprm *bprm) { - return; + return 0; } -static inline int security_bprm_set(struct linux_binprm *bprm) +static inline void security_bprm_committing_creds(struct linux_binprm *bprm) { - return cap_bprm_set_security(bprm); } -static inline int security_bprm_check(struct linux_binprm *bprm) +static inline void security_bprm_committed_creds(struct linux_binprm *bprm) { - return 0; } static inline int security_bprm_secureexec(struct linux_binprm *bprm) diff --git a/kernel/cred.c b/kernel/cred.c index cb6b5eda978d..e6fcdd67b2ec 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -68,7 +68,7 @@ static void release_tgcred_rcu(struct rcu_head *rcu) /* * Release a set of thread group credentials. */ -void release_tgcred(struct cred *cred) +static void release_tgcred(struct cred *cred) { #ifdef CONFIG_KEYS struct thread_group_cred *tgcred = cred->tgcred; @@ -163,6 +163,50 @@ error: } EXPORT_SYMBOL(prepare_creds); +/* + * Prepare credentials for current to perform an execve() + * - The caller must hold current->cred_exec_mutex + */ +struct cred *prepare_exec_creds(void) +{ + struct thread_group_cred *tgcred = NULL; + struct cred *new; + +#ifdef CONFIG_KEYS + tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); + if (!tgcred) + return NULL; +#endif + + new = prepare_creds(); + if (!new) { + kfree(tgcred); + return new; + } + +#ifdef CONFIG_KEYS + /* newly exec'd tasks don't get a thread keyring */ + key_put(new->thread_keyring); + new->thread_keyring = NULL; + + /* create a new per-thread-group creds for all this set of threads to + * share */ + memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred)); + + atomic_set(&tgcred->usage, 1); + spin_lock_init(&tgcred->lock); + + /* inherit the session keyring; new process keyring */ + key_get(tgcred->session_keyring); + tgcred->process_keyring = NULL; + + release_tgcred(new); + new->tgcred = tgcred; +#endif + + return new; +} + /* * prepare new credentials for the usermode helper dispatcher */ diff --git a/security/capability.c b/security/capability.c index efeb6d9e0e6a..185804f99ad1 100644 --- a/security/capability.c +++ b/security/capability.c @@ -32,24 +32,19 @@ static int cap_quota_on(struct dentry *dentry) return 0; } -static int cap_bprm_alloc_security(struct linux_binprm *bprm) +static int cap_bprm_check_security (struct linux_binprm *bprm) { return 0; } -static void cap_bprm_free_security(struct linux_binprm *bprm) +static void cap_bprm_committing_creds(struct linux_binprm *bprm) { } -static void cap_bprm_post_apply_creds(struct linux_binprm *bprm) +static void cap_bprm_committed_creds(struct linux_binprm *bprm) { } -static int cap_bprm_check_security(struct linux_binprm *bprm) -{ - return 0; -} - static int cap_sb_alloc_security(struct super_block *sb) { return 0; @@ -827,11 +822,9 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, syslog); set_to_cap_if_null(ops, settime); set_to_cap_if_null(ops, vm_enough_memory); - set_to_cap_if_null(ops, bprm_alloc_security); - set_to_cap_if_null(ops, bprm_free_security); - set_to_cap_if_null(ops, bprm_apply_creds); - set_to_cap_if_null(ops, bprm_post_apply_creds); - set_to_cap_if_null(ops, bprm_set_security); + set_to_cap_if_null(ops, bprm_set_creds); + set_to_cap_if_null(ops, bprm_committing_creds); + set_to_cap_if_null(ops, bprm_committed_creds); set_to_cap_if_null(ops, bprm_check_security); set_to_cap_if_null(ops, bprm_secureexec); set_to_cap_if_null(ops, sb_alloc_security); diff --git a/security/commoncap.c b/security/commoncap.c index b5419273f92d..51dfa11e8e56 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -167,7 +167,7 @@ int cap_capset(struct cred *new, static inline void bprm_clear_caps(struct linux_binprm *bprm) { - cap_clear(bprm->cap_post_exec_permitted); + cap_clear(bprm->cred->cap_permitted); bprm->cap_effective = false; } @@ -198,15 +198,15 @@ int cap_inode_killpriv(struct dentry *dentry) } static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, - struct linux_binprm *bprm) + struct linux_binprm *bprm, + bool *effective) { + struct cred *new = bprm->cred; unsigned i; int ret = 0; if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE) - bprm->cap_effective = true; - else - bprm->cap_effective = false; + *effective = true; CAP_FOR_EACH_U32(i) { __u32 permitted = caps->permitted.cap[i]; @@ -215,16 +215,13 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, /* * pP' = (X & fP) | (pI & fI) */ - bprm->cap_post_exec_permitted.cap[i] = - (current->cred->cap_bset.cap[i] & permitted) | - (current->cred->cap_inheritable.cap[i] & inheritable); + new->cap_permitted.cap[i] = + (new->cap_bset.cap[i] & permitted) | + (new->cap_inheritable.cap[i] & inheritable); - if (permitted & ~bprm->cap_post_exec_permitted.cap[i]) { - /* - * insufficient to execute correctly - */ + if (permitted & ~new->cap_permitted.cap[i]) + /* insufficient to execute correctly */ ret = -EPERM; - } } /* @@ -232,7 +229,7 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, * do not have enough capabilities, we return an error if they are * missing some "forced" (aka file-permitted) capabilities. */ - return bprm->cap_effective ? ret : 0; + return *effective ? ret : 0; } int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps) @@ -250,10 +247,9 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data size = inode->i_op->getxattr((struct dentry *)dentry, XATTR_NAME_CAPS, &caps, XATTR_CAPS_SZ); - if (size == -ENODATA || size == -EOPNOTSUPP) { + if (size == -ENODATA || size == -EOPNOTSUPP) /* no data, that's ok */ return -ENODATA; - } if (size < 0) return size; @@ -262,7 +258,7 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps.magic_etc); - switch ((magic_etc & VFS_CAP_REVISION_MASK)) { + switch (magic_etc & VFS_CAP_REVISION_MASK) { case VFS_CAP_REVISION_1: if (size != XATTR_CAPS_SZ_1) return -EINVAL; @@ -283,11 +279,12 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data cpu_caps->permitted.cap[i] = le32_to_cpu(caps.data[i].permitted); cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable); } + return 0; } /* Locate any VFS capabilities: */ -static int get_file_caps(struct linux_binprm *bprm) +static int get_file_caps(struct linux_binprm *bprm, bool *effective) { struct dentry *dentry; int rc = 0; @@ -313,7 +310,10 @@ static int get_file_caps(struct linux_binprm *bprm) goto out; } - rc = bprm_caps_from_vfs_caps(&vcaps, bprm); + rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective); + if (rc == -EINVAL) + printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n", + __func__, rc, bprm->filename); out: dput(dentry); @@ -334,18 +334,27 @@ int cap_inode_killpriv(struct dentry *dentry) return 0; } -static inline int get_file_caps(struct linux_binprm *bprm) +static inline int get_file_caps(struct linux_binprm *bprm, bool *effective) { bprm_clear_caps(bprm); return 0; } #endif -int cap_bprm_set_security (struct linux_binprm *bprm) +/* + * set up the new credentials for an exec'd task + */ +int cap_bprm_set_creds(struct linux_binprm *bprm) { + const struct cred *old = current_cred(); + struct cred *new = bprm->cred; + bool effective; int ret; - ret = get_file_caps(bprm); + effective = false; + ret = get_file_caps(bprm, &effective); + if (ret < 0) + return ret; if (!issecure(SECURE_NOROOT)) { /* @@ -353,63 +362,47 @@ int cap_bprm_set_security (struct linux_binprm *bprm) * executables under compatibility mode, we override the * capability sets for the file. * - * If only the real uid is 0, we do not set the effective - * bit. + * If only the real uid is 0, we do not set the effective bit. */ - if (bprm->e_uid == 0 || current_uid() == 0) { + if (new->euid == 0 || new->uid == 0) { /* pP' = (cap_bset & ~0) | (pI & ~0) */ - bprm->cap_post_exec_permitted = cap_combine( - current->cred->cap_bset, - current->cred->cap_inheritable); - bprm->cap_effective = (bprm->e_uid == 0); - ret = 0; + new->cap_permitted = cap_combine(old->cap_bset, + old->cap_inheritable); } + if (new->euid == 0) + effective = true; } - return ret; -} - -int cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) -{ - const struct cred *old = current_cred(); - struct cred *new; - - new = prepare_creds(); - if (!new) - return -ENOMEM; - - if (bprm->e_uid != old->uid || bprm->e_gid != old->gid || - !cap_issubset(bprm->cap_post_exec_permitted, - old->cap_permitted)) { - set_dumpable(current->mm, suid_dumpable); - current->pdeath_signal = 0; - - if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { - if (!capable(CAP_SETUID)) { - bprm->e_uid = old->uid; - bprm->e_gid = old->gid; - } - if (cap_limit_ptraced_target()) { - bprm->cap_post_exec_permitted = cap_intersect( - bprm->cap_post_exec_permitted, - new->cap_permitted); - } + /* Don't let someone trace a set[ug]id/setpcap binary with the revised + * credentials unless they have the appropriate permit + */ + if ((new->euid != old->uid || + new->egid != old->gid || + !cap_issubset(new->cap_permitted, old->cap_permitted)) && + bprm->unsafe & ~LSM_UNSAFE_PTRACE_CAP) { + /* downgrade; they get no more than they had, and maybe less */ + if (!capable(CAP_SETUID)) { + new->euid = new->uid; + new->egid = new->gid; } + if (cap_limit_ptraced_target()) + new->cap_permitted = cap_intersect(new->cap_permitted, + old->cap_permitted); } - new->suid = new->euid = new->fsuid = bprm->e_uid; - new->sgid = new->egid = new->fsgid = bprm->e_gid; + new->suid = new->fsuid = new->euid; + new->sgid = new->fsgid = new->egid; - /* For init, we want to retain the capabilities set - * in the init_task struct. Thus we skip the usual - * capability rules */ + /* For init, we want to retain the capabilities set in the initial + * task. Thus we skip the usual capability rules + */ if (!is_global_init(current)) { - new->cap_permitted = bprm->cap_post_exec_permitted; - if (bprm->cap_effective) - new->cap_effective = bprm->cap_post_exec_permitted; + if (effective) + new->cap_effective = new->cap_permitted; else cap_clear(new->cap_effective); } + bprm->cap_effective = effective; /* * Audit candidate if current->cap_effective is set @@ -425,23 +418,31 @@ int cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) */ if (!cap_isclear(new->cap_effective)) { if (!cap_issubset(CAP_FULL_SET, new->cap_effective) || - bprm->e_uid != 0 || new->uid != 0 || - issecure(SECURE_NOROOT)) - audit_log_bprm_fcaps(bprm, new, old); + new->euid != 0 || new->uid != 0 || + issecure(SECURE_NOROOT)) { + ret = audit_log_bprm_fcaps(bprm, new, old); + if (ret < 0) + return ret; + } } new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); - return commit_creds(new); + return 0; } -int cap_bprm_secureexec (struct linux_binprm *bprm) +/* + * determine whether a secure execution is required + * - the creds have been committed at this point, and are no longer available + * through bprm + */ +int cap_bprm_secureexec(struct linux_binprm *bprm) { const struct cred *cred = current_cred(); if (cred->uid != 0) { if (bprm->cap_effective) return 1; - if (!cap_isclear(bprm->cap_post_exec_permitted)) + if (!cap_isclear(cred->cap_permitted)) return 1; } @@ -477,7 +478,7 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) } /* moved from kernel/sys.c. */ -/* +/* * cap_emulate_setxuid() fixes the effective / permitted capabilities of * a process after a call to setuid, setreuid, or setresuid. * @@ -491,10 +492,10 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective * capabilities are set to the permitted capabilities. * - * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should + * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should * never happen. * - * -astor + * -astor * * cevans - New behaviour, Oct '99 * A process may, via prctl(), elect to keep its capabilities when it @@ -751,4 +752,3 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) cap_sys_admin = 1; return __vm_enough_memory(mm, pages, cap_sys_admin); } - diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index df329f684a65..2f5d89e92b85 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -274,48 +274,6 @@ static int install_session_keyring(struct key *keyring) return commit_creds(new); } -/*****************************************************************************/ -/* - * deal with execve() - */ -int exec_keys(struct task_struct *tsk) -{ - struct thread_group_cred *tgcred = NULL; - struct cred *new; - -#ifdef CONFIG_KEYS - tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); - if (!tgcred) - return -ENOMEM; -#endif - - new = prepare_creds(); - if (new < 0) - return -ENOMEM; - - /* newly exec'd tasks don't get a thread keyring */ - key_put(new->thread_keyring); - new->thread_keyring = NULL; - - /* create a new per-thread-group creds for all this set of threads to - * share */ - memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred)); - - atomic_set(&tgcred->usage, 1); - spin_lock_init(&tgcred->lock); - - /* inherit the session keyring; new process keyring */ - key_get(tgcred->session_keyring); - tgcred->process_keyring = NULL; - - release_tgcred(new); - new->tgcred = tgcred; - - commit_creds(new); - return 0; - -} /* end exec_keys() */ - /*****************************************************************************/ /* * the filesystem user ID changed diff --git a/security/root_plug.c b/security/root_plug.c index c3f68b5b372d..40fb4f15e27b 100644 --- a/security/root_plug.c +++ b/security/root_plug.c @@ -55,9 +55,9 @@ static int rootplug_bprm_check_security (struct linux_binprm *bprm) struct usb_device *dev; root_dbg("file %s, e_uid = %d, e_gid = %d\n", - bprm->filename, bprm->e_uid, bprm->e_gid); + bprm->filename, bprm->cred->euid, bprm->cred->egid); - if (bprm->e_gid == 0) { + if (bprm->cred->egid == 0) { dev = usb_find_device(vendor_id, product_id); if (!dev) { root_dbg("e_gid = 0, and device not found, " @@ -75,15 +75,12 @@ static struct security_operations rootplug_security_ops = { .ptrace_may_access = cap_ptrace_may_access, .ptrace_traceme = cap_ptrace_traceme, .capget = cap_capget, - .capset_check = cap_capset_check, - .capset_set = cap_capset_set, + .capset = cap_capset, .capable = cap_capable, - .bprm_apply_creds = cap_bprm_apply_creds, - .bprm_set_security = cap_bprm_set_security, + .bprm_set_creds = cap_bprm_set_creds, - .task_post_setuid = cap_task_post_setuid, - .task_reparent_to_init = cap_task_reparent_to_init, + .task_fix_setuid = cap_task_fix_setuid, .task_prctl = cap_task_prctl, .bprm_check_security = rootplug_bprm_check_security, diff --git a/security/security.c b/security/security.c index a55d739c6864..dc5babb2d6d8 100644 --- a/security/security.c +++ b/security/security.c @@ -213,34 +213,24 @@ int security_vm_enough_memory_kern(long pages) return security_ops->vm_enough_memory(current->mm, pages); } -int security_bprm_alloc(struct linux_binprm *bprm) +int security_bprm_set_creds(struct linux_binprm *bprm) { - return security_ops->bprm_alloc_security(bprm); + return security_ops->bprm_set_creds(bprm); } -void security_bprm_free(struct linux_binprm *bprm) -{ - security_ops->bprm_free_security(bprm); -} - -int security_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) -{ - return security_ops->bprm_apply_creds(bprm, unsafe); -} - -void security_bprm_post_apply_creds(struct linux_binprm *bprm) +int security_bprm_check(struct linux_binprm *bprm) { - security_ops->bprm_post_apply_creds(bprm); + return security_ops->bprm_check_security(bprm); } -int security_bprm_set(struct linux_binprm *bprm) +void security_bprm_committing_creds(struct linux_binprm *bprm) { - return security_ops->bprm_set_security(bprm); + return security_ops->bprm_committing_creds(bprm); } -int security_bprm_check(struct linux_binprm *bprm) +void security_bprm_committed_creds(struct linux_binprm *bprm) { - return security_ops->bprm_check_security(bprm); + return security_ops->bprm_committed_creds(bprm); } int security_bprm_secureexec(struct linux_binprm *bprm) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index c71bba78872f..21a592184633 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2029,59 +2029,45 @@ static int selinux_vm_enough_memory(struct mm_struct *mm, long pages) /* binprm security operations */ -static int selinux_bprm_alloc_security(struct linux_binprm *bprm) +static int selinux_bprm_set_creds(struct linux_binprm *bprm) { - struct bprm_security_struct *bsec; - - bsec = kzalloc(sizeof(struct bprm_security_struct), GFP_KERNEL); - if (!bsec) - return -ENOMEM; - - bsec->sid = SECINITSID_UNLABELED; - bsec->set = 0; - - bprm->security = bsec; - return 0; -} - -static int selinux_bprm_set_security(struct linux_binprm *bprm) -{ - struct task_security_struct *tsec; - struct inode *inode = bprm->file->f_path.dentry->d_inode; + const struct task_security_struct *old_tsec; + struct task_security_struct *new_tsec; struct inode_security_struct *isec; - struct bprm_security_struct *bsec; - u32 newsid; struct avc_audit_data ad; + struct inode *inode = bprm->file->f_path.dentry->d_inode; int rc; - rc = secondary_ops->bprm_set_security(bprm); + rc = secondary_ops->bprm_set_creds(bprm); if (rc) return rc; - bsec = bprm->security; - - if (bsec->set) + /* SELinux context only depends on initial program or script and not + * the script interpreter */ + if (bprm->cred_prepared) return 0; - tsec = current_security(); + old_tsec = current_security(); + new_tsec = bprm->cred->security; isec = inode->i_security; /* Default to the current task SID. */ - bsec->sid = tsec->sid; + new_tsec->sid = old_tsec->sid; + new_tsec->osid = old_tsec->sid; /* Reset fs, key, and sock SIDs on execve. */ - tsec->create_sid = 0; - tsec->keycreate_sid = 0; - tsec->sockcreate_sid = 0; + new_tsec->create_sid = 0; + new_tsec->keycreate_sid = 0; + new_tsec->sockcreate_sid = 0; - if (tsec->exec_sid) { - newsid = tsec->exec_sid; + if (old_tsec->exec_sid) { + new_tsec->sid = old_tsec->exec_sid; /* Reset exec SID on execve. */ - tsec->exec_sid = 0; + new_tsec->exec_sid = 0; } else { /* Check for a default transition on this program. */ - rc = security_transition_sid(tsec->sid, isec->sid, - SECCLASS_PROCESS, &newsid); + rc = security_transition_sid(old_tsec->sid, isec->sid, + SECCLASS_PROCESS, &new_tsec->sid); if (rc) return rc; } @@ -2090,33 +2076,63 @@ static int selinux_bprm_set_security(struct linux_binprm *bprm) ad.u.fs.path = bprm->file->f_path; if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) - newsid = tsec->sid; + new_tsec->sid = old_tsec->sid; - if (tsec->sid == newsid) { - rc = avc_has_perm(tsec->sid, isec->sid, + if (new_tsec->sid == old_tsec->sid) { + rc = avc_has_perm(old_tsec->sid, isec->sid, SECCLASS_FILE, FILE__EXECUTE_NO_TRANS, &ad); if (rc) return rc; } else { /* Check permissions for the transition. */ - rc = avc_has_perm(tsec->sid, newsid, + rc = avc_has_perm(old_tsec->sid, new_tsec->sid, SECCLASS_PROCESS, PROCESS__TRANSITION, &ad); if (rc) return rc; - rc = avc_has_perm(newsid, isec->sid, + rc = avc_has_perm(new_tsec->sid, isec->sid, SECCLASS_FILE, FILE__ENTRYPOINT, &ad); if (rc) return rc; - /* Clear any possibly unsafe personality bits on exec: */ - current->personality &= ~PER_CLEAR_ON_SETID; + /* Check for shared state */ + if (bprm->unsafe & LSM_UNSAFE_SHARE) { + rc = avc_has_perm(old_tsec->sid, new_tsec->sid, + SECCLASS_PROCESS, PROCESS__SHARE, + NULL); + if (rc) + return -EPERM; + } + + /* Make sure that anyone attempting to ptrace over a task that + * changes its SID has the appropriate permit */ + if (bprm->unsafe & + (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) { + struct task_struct *tracer; + struct task_security_struct *sec; + u32 ptsid = 0; + + rcu_read_lock(); + tracer = tracehook_tracer_task(current); + if (likely(tracer != NULL)) { + sec = __task_cred(tracer)->security; + ptsid = sec->sid; + } + rcu_read_unlock(); + + if (ptsid != 0) { + rc = avc_has_perm(ptsid, new_tsec->sid, + SECCLASS_PROCESS, + PROCESS__PTRACE, NULL); + if (rc) + return -EPERM; + } + } - /* Set the security field to the new SID. */ - bsec->sid = newsid; + /* Clear any possibly unsafe personality bits on exec: */ + bprm->per_clear |= PER_CLEAR_ON_SETID; } - bsec->set = 1; return 0; } @@ -2125,7 +2141,6 @@ static int selinux_bprm_check_security(struct linux_binprm *bprm) return secondary_ops->bprm_check_security(bprm); } - static int selinux_bprm_secureexec(struct linux_binprm *bprm) { const struct cred *cred = current_cred(); @@ -2141,19 +2156,13 @@ static int selinux_bprm_secureexec(struct linux_binprm *bprm) the noatsecure permission is granted between the two SIDs, i.e. ahp returns 0. */ atsecure = avc_has_perm(osid, sid, - SECCLASS_PROCESS, - PROCESS__NOATSECURE, NULL); + SECCLASS_PROCESS, + PROCESS__NOATSECURE, NULL); } return (atsecure || secondary_ops->bprm_secureexec(bprm)); } -static void selinux_bprm_free_security(struct linux_binprm *bprm) -{ - kfree(bprm->security); - bprm->security = NULL; -} - extern struct vfsmount *selinuxfs_mount; extern struct dentry *selinux_null; @@ -2252,108 +2261,78 @@ static inline void flush_unauthorized_files(const struct cred *cred, spin_unlock(&files->file_lock); } -static int selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) +/* + * Prepare a process for imminent new credential changes due to exec + */ +static void selinux_bprm_committing_creds(struct linux_binprm *bprm) { - struct task_security_struct *tsec; - struct bprm_security_struct *bsec; - struct cred *new; - u32 sid; - int rc; - - rc = secondary_ops->bprm_apply_creds(bprm, unsafe); - if (rc < 0) - return rc; - - new = prepare_creds(); - if (!new) - return -ENOMEM; + struct task_security_struct *new_tsec; + struct rlimit *rlim, *initrlim; + int rc, i; - tsec = new->security; + secondary_ops->bprm_committing_creds(bprm); - bsec = bprm->security; - sid = bsec->sid; - - tsec->osid = tsec->sid; - bsec->unsafe = 0; - if (tsec->sid != sid) { - /* Check for shared state. If not ok, leave SID - unchanged and kill. */ - if (unsafe & LSM_UNSAFE_SHARE) { - rc = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS, - PROCESS__SHARE, NULL); - if (rc) { - bsec->unsafe = 1; - goto out; - } - } + new_tsec = bprm->cred->security; + if (new_tsec->sid == new_tsec->osid) + return; - /* Check for ptracing, and update the task SID if ok. - Otherwise, leave SID unchanged and kill. */ - if (unsafe & (LSM_UNSAFE_PTRACE | LSM_UNSAFE_PTRACE_CAP)) { - struct task_struct *tracer; - struct task_security_struct *sec; - u32 ptsid = 0; + /* Close files for which the new task SID is not authorized. */ + flush_unauthorized_files(bprm->cred, current->files); - rcu_read_lock(); - tracer = tracehook_tracer_task(current); - if (likely(tracer != NULL)) { - sec = __task_cred(tracer)->security; - ptsid = sec->sid; - } - rcu_read_unlock(); + /* Always clear parent death signal on SID transitions. */ + current->pdeath_signal = 0; - if (ptsid != 0) { - rc = avc_has_perm(ptsid, sid, SECCLASS_PROCESS, - PROCESS__PTRACE, NULL); - if (rc) { - bsec->unsafe = 1; - goto out; - } - } + /* Check whether the new SID can inherit resource limits from the old + * SID. If not, reset all soft limits to the lower of the current + * task's hard limit and the init task's soft limit. + * + * Note that the setting of hard limits (even to lower them) can be + * controlled by the setrlimit check. The inclusion of the init task's + * soft limit into the computation is to avoid resetting soft limits + * higher than the default soft limit for cases where the default is + * lower than the hard limit, e.g. RLIMIT_CORE or RLIMIT_STACK. + */ + rc = avc_has_perm(new_tsec->osid, new_tsec->sid, SECCLASS_PROCESS, + PROCESS__RLIMITINH, NULL); + if (rc) { + for (i = 0; i < RLIM_NLIMITS; i++) { + rlim = current->signal->rlim + i; + initrlim = init_task.signal->rlim + i; + rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); } - tsec->sid = sid; + update_rlimit_cpu(rlim->rlim_cur); } - -out: - commit_creds(new); - return 0; } /* - * called after apply_creds without the task lock held + * Clean up the process immediately after the installation of new credentials + * due to exec */ -static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm) +static void selinux_bprm_committed_creds(struct linux_binprm *bprm) { - const struct cred *cred = current_cred(); - struct task_security_struct *tsec; - struct rlimit *rlim, *initrlim; + const struct task_security_struct *tsec = current_security(); struct itimerval itimer; - struct bprm_security_struct *bsec; struct sighand_struct *psig; + u32 osid, sid; int rc, i; unsigned long flags; - tsec = current_security(); - bsec = bprm->security; + secondary_ops->bprm_committed_creds(bprm); - if (bsec->unsafe) { - force_sig_specific(SIGKILL, current); - return; - } - if (tsec->osid == tsec->sid) + osid = tsec->osid; + sid = tsec->sid; + + if (sid == osid) return; - /* Close files for which the new task SID is not authorized. */ - flush_unauthorized_files(cred, current->files); - - /* Check whether the new SID can inherit signal state - from the old SID. If not, clear itimers to avoid - subsequent signal generation and flush and unblock - signals. This must occur _after_ the task SID has - been updated so that any kill done after the flush - will be checked against the new SID. */ - rc = avc_has_perm(tsec->osid, tsec->sid, SECCLASS_PROCESS, - PROCESS__SIGINH, NULL); + /* Check whether the new SID can inherit signal state from the old SID. + * If not, clear itimers to avoid subsequent signal generation and + * flush and unblock signals. + * + * This must occur _after_ the task SID has been updated so that any + * kill done after the flush will be checked against the new SID. + */ + rc = avc_has_perm(osid, sid, SECCLASS_PROCESS, PROCESS__SIGINH, NULL); if (rc) { memset(&itimer, 0, sizeof itimer); for (i = 0; i < 3; i++) @@ -2366,32 +2345,8 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm) spin_unlock_irq(¤t->sighand->siglock); } - /* Always clear parent death signal on SID transitions. */ - current->pdeath_signal = 0; - - /* Check whether the new SID can inherit resource limits - from the old SID. If not, reset all soft limits to - the lower of the current task's hard limit and the init - task's soft limit. Note that the setting of hard limits - (even to lower them) can be controlled by the setrlimit - check. The inclusion of the init task's soft limit into - the computation is to avoid resetting soft limits higher - than the default soft limit for cases where the default - is lower than the hard limit, e.g. RLIMIT_CORE or - RLIMIT_STACK.*/ - rc = avc_has_perm(tsec->osid, tsec->sid, SECCLASS_PROCESS, - PROCESS__RLIMITINH, NULL); - if (rc) { - for (i = 0; i < RLIM_NLIMITS; i++) { - rlim = current->signal->rlim + i; - initrlim = init_task.signal->rlim+i; - rlim->rlim_cur = min(rlim->rlim_max, initrlim->rlim_cur); - } - update_rlimit_cpu(rlim->rlim_cur); - } - - /* Wake up the parent if it is waiting so that it can - recheck wait permission to the new task SID. */ + /* Wake up the parent if it is waiting so that it can recheck + * wait permission to the new task SID. */ read_lock_irq(&tasklist_lock); psig = current->parent->sighand; spin_lock_irqsave(&psig->siglock, flags); @@ -5556,12 +5511,10 @@ static struct security_operations selinux_ops = { .netlink_send = selinux_netlink_send, .netlink_recv = selinux_netlink_recv, - .bprm_alloc_security = selinux_bprm_alloc_security, - .bprm_free_security = selinux_bprm_free_security, - .bprm_apply_creds = selinux_bprm_apply_creds, - .bprm_post_apply_creds = selinux_bprm_post_apply_creds, - .bprm_set_security = selinux_bprm_set_security, + .bprm_set_creds = selinux_bprm_set_creds, .bprm_check_security = selinux_bprm_check_security, + .bprm_committing_creds = selinux_bprm_committing_creds, + .bprm_committed_creds = selinux_bprm_committed_creds, .bprm_secureexec = selinux_bprm_secureexec, .sb_alloc_security = selinux_sb_alloc_security, diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index f8be8d7fa26d..3cc45168f674 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -77,17 +77,6 @@ struct ipc_security_struct { u32 sid; /* SID of IPC resource */ }; -struct bprm_security_struct { - u32 sid; /* SID for transformed process */ - unsigned char set; - - /* - * unsafe is used to share failure information from bprm_apply_creds() - * to bprm_post_apply_creds(). - */ - char unsafe; -}; - struct netif_security_struct { int ifindex; /* device index */ u32 sid; /* SID for this interface */ diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index e952b397153d..de396742abf4 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2596,8 +2596,7 @@ struct security_operations smack_ops = { .settime = cap_settime, .vm_enough_memory = cap_vm_enough_memory, - .bprm_apply_creds = cap_bprm_apply_creds, - .bprm_set_security = cap_bprm_set_security, + .bprm_set_creds = cap_bprm_set_creds, .bprm_secureexec = cap_bprm_secureexec, .sb_alloc_security = smack_sb_alloc_security, -- cgit v1.2.3 From d76b0d9b2d87cfc95686e148767cbf7d0e22bdc0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:25 +1100 Subject: CRED: Use creds in file structs Attach creds to file structs and discard f_uid/f_gid. file_operations::open() methods (such as hppfs_open()) should use file->f_cred rather than current_cred(). At the moment file->f_cred will be current_cred() at this point. Signed-off-by: David Howells Reviewed-by: James Morris Signed-off-by: James Morris --- arch/mips/kernel/vpe.c | 4 ++-- drivers/isdn/hysdn/hysdn_procconf.c | 6 ++++-- fs/coda/file.c | 2 +- fs/file_table.c | 7 ++++--- fs/hppfs/hppfs.c | 4 ++-- include/linux/fs.h | 2 +- net/ipv4/netfilter/ipt_LOG.c | 4 ++-- net/ipv6/netfilter/ip6t_LOG.c | 4 ++-- net/netfilter/nfnetlink_log.c | 5 +++-- net/netfilter/xt_owner.c | 16 ++++++++-------- net/sched/cls_flow.c | 4 ++-- 11 files changed, 31 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 972b2d2b8401..09786e496375 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -1085,8 +1085,8 @@ static int vpe_open(struct inode *inode, struct file *filp) v->load_addr = NULL; v->len = 0; - v->uid = filp->f_uid; - v->gid = filp->f_gid; + v->uid = filp->f_cred->fsuid; + v->gid = filp->f_cred->fsgid; #ifdef CONFIG_MIPS_APSP_KSPD /* get kspd to tell us when a syscall_exit happens */ diff --git a/drivers/isdn/hysdn/hysdn_procconf.c b/drivers/isdn/hysdn/hysdn_procconf.c index 484299b031f8..8f9f4912de32 100644 --- a/drivers/isdn/hysdn/hysdn_procconf.c +++ b/drivers/isdn/hysdn/hysdn_procconf.c @@ -246,7 +246,8 @@ hysdn_conf_open(struct inode *ino, struct file *filep) } if (card->debug_flags & (LOG_PROC_OPEN | LOG_PROC_ALL)) hysdn_addlog(card, "config open for uid=%d gid=%d mode=0x%x", - filep->f_uid, filep->f_gid, filep->f_mode); + filep->f_cred->fsuid, filep->f_cred->fsgid, + filep->f_mode); if ((filep->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_WRITE) { /* write only access -> write boot file or conf line */ @@ -331,7 +332,8 @@ hysdn_conf_close(struct inode *ino, struct file *filep) } if (card->debug_flags & (LOG_PROC_OPEN | LOG_PROC_ALL)) hysdn_addlog(card, "config close for uid=%d gid=%d mode=0x%x", - filep->f_uid, filep->f_gid, filep->f_mode); + filep->f_cred->fsuid, filep->f_cred->fsgid, + filep->f_mode); if ((filep->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_WRITE) { /* write only access -> write boot file or conf line */ diff --git a/fs/coda/file.c b/fs/coda/file.c index 29137ff3ca67..5a8769985494 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -174,7 +174,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file) BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); err = venus_close(coda_inode->i_sb, coda_i2f(coda_inode), - coda_flags, coda_file->f_uid); + coda_flags, coda_file->f_cred->fsuid); host_inode = cfi->cfi_container->f_path.dentry->d_inode; cii = ITOC(coda_inode); diff --git a/fs/file_table.c b/fs/file_table.c index bc4563fe791d..0fbcacc3ea75 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -36,7 +36,9 @@ static struct percpu_counter nr_files __cacheline_aligned_in_smp; static inline void file_free_rcu(struct rcu_head *head) { - struct file *f = container_of(head, struct file, f_u.fu_rcuhead); + struct file *f = container_of(head, struct file, f_u.fu_rcuhead); + + put_cred(f->f_cred); kmem_cache_free(filp_cachep, f); } @@ -121,8 +123,7 @@ struct file *get_empty_filp(void) INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); - f->f_uid = cred->fsuid; - f->f_gid = cred->fsgid; + f->f_cred = get_cred(cred); eventpoll_init_file(f); /* f->f_version: 0 */ return f; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 795e2c130ad7..b278f7f52024 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -426,7 +426,7 @@ static int file_mode(int fmode) static int hppfs_open(struct inode *inode, struct file *file) { - const struct cred *cred = current_cred(); + const struct cred *cred = file->f_cred; struct hppfs_private *data; struct vfsmount *proc_mnt; struct dentry *proc_dentry; @@ -490,7 +490,7 @@ static int hppfs_open(struct inode *inode, struct file *file) static int hppfs_dir_open(struct inode *inode, struct file *file) { - const struct cred *cred = current_cred(); + const struct cred *cred = file->f_cred; struct hppfs_private *data; struct vfsmount *proc_mnt; struct dentry *proc_dentry; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3bfec1327b8d..c0fb6d81d89b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -827,7 +827,7 @@ struct file { fmode_t f_mode; loff_t f_pos; struct fown_struct f_owner; - unsigned int f_uid, f_gid; + const struct cred *f_cred; struct file_ra_state f_ra; u64 f_version; diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index fc6ce04a3e35..7b5dbe118c09 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -340,8 +340,8 @@ static void dump_packet(const struct nf_loginfo *info, read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) printk("UID=%u GID=%u ", - skb->sk->sk_socket->file->f_uid, - skb->sk->sk_socket->file->f_gid); + skb->sk->sk_socket->file->f_cred->fsuid, + skb->sk->sk_socket->file->f_cred->fsgid); read_unlock_bh(&skb->sk->sk_callback_lock); } diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index caa441d09567..871d157cec4e 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -364,8 +364,8 @@ static void dump_packet(const struct nf_loginfo *info, read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) printk("UID=%u GID=%u ", - skb->sk->sk_socket->file->f_uid, - skb->sk->sk_socket->file->f_gid); + skb->sk->sk_socket->file->f_cred->fsuid, + skb->sk->sk_socket->file->f_cred->fsgid); read_unlock_bh(&skb->sk->sk_callback_lock); } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 41e0105d3828..38f9efd90e8d 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -474,8 +474,9 @@ __build_packet_message(struct nfulnl_instance *inst, if (skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) { - __be32 uid = htonl(skb->sk->sk_socket->file->f_uid); - __be32 gid = htonl(skb->sk->sk_socket->file->f_gid); + struct file *file = skb->sk->sk_socket->file; + __be32 uid = htonl(file->f_cred->fsuid); + __be32 gid = htonl(file->f_cred->fsgid); /* need to unlock here since NLA_PUT may goto */ read_unlock_bh(&skb->sk->sk_callback_lock); NLA_PUT_BE32(inst->skb, NFULA_UID, uid); diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index f19ebd9b78f5..22b2a5e881ea 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -34,12 +34,12 @@ owner_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) return false; if (info->match & IPT_OWNER_UID) - if ((filp->f_uid != info->uid) ^ + if ((filp->f_cred->fsuid != info->uid) ^ !!(info->invert & IPT_OWNER_UID)) return false; if (info->match & IPT_OWNER_GID) - if ((filp->f_gid != info->gid) ^ + if ((filp->f_cred->fsgid != info->gid) ^ !!(info->invert & IPT_OWNER_GID)) return false; @@ -60,12 +60,12 @@ owner_mt6_v0(const struct sk_buff *skb, const struct xt_match_param *par) return false; if (info->match & IP6T_OWNER_UID) - if ((filp->f_uid != info->uid) ^ + if ((filp->f_cred->fsuid != info->uid) ^ !!(info->invert & IP6T_OWNER_UID)) return false; if (info->match & IP6T_OWNER_GID) - if ((filp->f_gid != info->gid) ^ + if ((filp->f_cred->fsgid != info->gid) ^ !!(info->invert & IP6T_OWNER_GID)) return false; @@ -93,14 +93,14 @@ owner_mt(const struct sk_buff *skb, const struct xt_match_param *par) (XT_OWNER_UID | XT_OWNER_GID)) == 0; if (info->match & XT_OWNER_UID) - if ((filp->f_uid >= info->uid_min && - filp->f_uid <= info->uid_max) ^ + if ((filp->f_cred->fsuid >= info->uid_min && + filp->f_cred->fsuid <= info->uid_max) ^ !(info->invert & XT_OWNER_UID)) return false; if (info->match & XT_OWNER_GID) - if ((filp->f_gid >= info->gid_min && - filp->f_gid <= info->gid_max) ^ + if ((filp->f_cred->fsgid >= info->gid_min && + filp->f_cred->fsgid <= info->gid_max) ^ !(info->invert & XT_OWNER_GID)) return false; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 0ebaff637e31..0ef4e3065bcd 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -260,14 +260,14 @@ static u32 flow_get_rtclassid(const struct sk_buff *skb) static u32 flow_get_skuid(const struct sk_buff *skb) { if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) - return skb->sk->sk_socket->file->f_uid; + return skb->sk->sk_socket->file->f_cred->fsuid; return 0; } static u32 flow_get_skgid(const struct sk_buff *skb) { if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file) - return skb->sk->sk_socket->file->f_gid; + return skb->sk->sk_socket->file->f_cred->fsgid; return 0; } -- cgit v1.2.3 From 98870ab0a5a3f1822aee681d2997017e1c87d026 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:26 +1100 Subject: CRED: Documentation Document credentials and the new credentials API. Signed-off-by: David Howells Signed-off-by: James Morris --- Documentation/credentials.txt | 582 ++++++++++++++++++++++++++++++++++++++++++ include/linux/cred.h | 12 +- kernel/cred.c | 2 +- 3 files changed, 594 insertions(+), 2 deletions(-) create mode 100644 Documentation/credentials.txt (limited to 'include') diff --git a/Documentation/credentials.txt b/Documentation/credentials.txt new file mode 100644 index 000000000000..df03169782ea --- /dev/null +++ b/Documentation/credentials.txt @@ -0,0 +1,582 @@ + ==================== + CREDENTIALS IN LINUX + ==================== + +By: David Howells + +Contents: + + (*) Overview. + + (*) Types of credentials. + + (*) File markings. + + (*) Task credentials. + + - Immutable credentials. + - Accessing task credentials. + - Accessing another task's credentials. + - Altering credentials. + - Managing credentials. + + (*) Open file credentials. + + (*) Overriding the VFS's use of credentials. + + +======== +OVERVIEW +======== + +There are several parts to the security check performed by Linux when one +object acts upon another: + + (1) Objects. + + Objects are things in the system that may be acted upon directly by + userspace programs. Linux has a variety of actionable objects, including: + + - Tasks + - Files/inodes + - Sockets + - Message queues + - Shared memory segments + - Semaphores + - Keys + + As a part of the description of all these objects there is a set of + credentials. What's in the set depends on the type of object. + + (2) Object ownership. + + Amongst the credentials of most objects, there will be a subset that + indicates the ownership of that object. This is used for resource + accounting and limitation (disk quotas and task rlimits for example). + + In a standard UNIX filesystem, for instance, this will be defined by the + UID marked on the inode. + + (3) The objective context. + + Also amongst the credentials of those objects, there will be a subset that + indicates the 'objective context' of that object. This may or may not be + the same set as in (2) - in standard UNIX files, for instance, this is the + defined by the UID and the GID marked on the inode. + + The objective context is used as part of the security calculation that is + carried out when an object is acted upon. + + (4) Subjects. + + A subject is an object that is acting upon another object. + + Most of the objects in the system are inactive: they don't act on other + objects within the system. Processes/tasks are the obvious exception: + they do stuff; they access and manipulate things. + + Objects other than tasks may under some circumstances also be subjects. + For instance an open file may send SIGIO to a task using the UID and EUID + given to it by a task that called fcntl(F_SETOWN) upon it. In this case, + the file struct will have a subjective context too. + + (5) The subjective context. + + A subject has an additional interpretation of its credentials. A subset + of its credentials forms the 'subjective context'. The subjective context + is used as part of the security calculation that is carried out when a + subject acts. + + A Linux task, for example, has the FSUID, FSGID and the supplementary + group list for when it is acting upon a file - which are quite separate + from the real UID and GID that normally form the objective context of the + task. + + (6) Actions. + + Linux has a number of actions available that a subject may perform upon an + object. The set of actions available depends on the nature of the subject + and the object. + + Actions include reading, writing, creating and deleting files; forking or + signalling and tracing tasks. + + (7) Rules, access control lists and security calculations. + + When a subject acts upon an object, a security calculation is made. This + involves taking the subjective context, the objective context and the + action, and searching one or more sets of rules to see whether the subject + is granted or denied permission to act in the desired manner on the + object, given those contexts. + + There are two main sources of rules: + + (a) Discretionary access control (DAC): + + Sometimes the object will include sets of rules as part of its + description. This is an 'Access Control List' or 'ACL'. A Linux + file may supply more than one ACL. + + A traditional UNIX file, for example, includes a permissions mask that + is an abbreviated ACL with three fixed classes of subject ('user', + 'group' and 'other'), each of which may be granted certain privileges + ('read', 'write' and 'execute' - whatever those map to for the object + in question). UNIX file permissions do not allow the arbitrary + specification of subjects, however, and so are of limited use. + + A Linux file might also sport a POSIX ACL. This is a list of rules + that grants various permissions to arbitrary subjects. + + (b) Mandatory access control (MAC): + + The system as a whole may have one or more sets of rules that get + applied to all subjects and objects, regardless of their source. + SELinux and Smack are examples of this. + + In the case of SELinux and Smack, each object is given a label as part + of its credentials. When an action is requested, they take the + subject label, the object label and the action and look for a rule + that says that this action is either granted or denied. + + +==================== +TYPES OF CREDENTIALS +==================== + +The Linux kernel supports the following types of credentials: + + (1) Traditional UNIX credentials. + + Real User ID + Real Group ID + + The UID and GID are carried by most, if not all, Linux objects, even if in + some cases it has to be invented (FAT or CIFS files for example, which are + derived from Windows). These (mostly) define the objective context of + that object, with tasks being slightly different in some cases. + + Effective, Saved and FS User ID + Effective, Saved and FS Group ID + Supplementary groups + + These are additional credentials used by tasks only. Usually, an + EUID/EGID/GROUPS will be used as the subjective context, and real UID/GID + will be used as the objective. For tasks, it should be noted that this is + not always true. + + (2) Capabilities. + + Set of permitted capabilities + Set of inheritable capabilities + Set of effective capabilities + Capability bounding set + + These are only carried by tasks. They indicate superior capabilities + granted piecemeal to a task that an ordinary task wouldn't otherwise have. + These are manipulated implicitly by changes to the traditional UNIX + credentials, but can also be manipulated directly by the capset() system + call. + + The permitted capabilities are those caps that the process might grant + itself to its effective or permitted sets through capset(). This + inheritable set might also be so constrained. + + The effective capabilities are the ones that a task is actually allowed to + make use of itself. + + The inheritable capabilities are the ones that may get passed across + execve(). + + The bounding set limits the capabilities that may be inherited across + execve(), especially when a binary is executed that will execute as UID 0. + + (3) Secure management flags (securebits). + + These are only carried by tasks. These govern the way the above + credentials are manipulated and inherited over certain operations such as + execve(). They aren't used directly as objective or subjective + credentials. + + (4) Keys and keyrings. + + These are only carried by tasks. They carry and cache security tokens + that don't fit into the other standard UNIX credentials. They are for + making such things as network filesystem keys available to the file + accesses performed by processes, without the necessity of ordinary + programs having to know about security details involved. + + Keyrings are a special type of key. They carry sets of other keys and can + be searched for the desired key. Each process may subscribe to a number + of keyrings: + + Per-thread keying + Per-process keyring + Per-session keyring + + When a process accesses a key, if not already present, it will normally be + cached on one of these keyrings for future accesses to find. + + For more information on using keys, see Documentation/keys.txt. + + (5) LSM + + The Linux Security Module allows extra controls to be placed over the + operations that a task may do. Currently Linux supports two main + alternate LSM options: SELinux and Smack. + + Both work by labelling the objects in a system and then applying sets of + rules (policies) that say what operations a task with one label may do to + an object with another label. + + (6) AF_KEY + + This is a socket-based approach to credential management for networking + stacks [RFC 2367]. It isn't discussed by this document as it doesn't + interact directly with task and file credentials; rather it keeps system + level credentials. + + +When a file is opened, part of the opening task's subjective context is +recorded in the file struct created. This allows operations using that file +struct to use those credentials instead of the subjective context of the task +that issued the operation. An example of this would be a file opened on a +network filesystem where the credentials of the opened file should be presented +to the server, regardless of who is actually doing a read or a write upon it. + + +============= +FILE MARKINGS +============= + +Files on disk or obtained over the network may have annotations that form the +objective security context of that file. Depending on the type of filesystem, +this may include one or more of the following: + + (*) UNIX UID, GID, mode; + + (*) Windows user ID; + + (*) Access control list; + + (*) LSM security label; + + (*) UNIX exec privilege escalation bits (SUID/SGID); + + (*) File capabilities exec privilege escalation bits. + +These are compared to the task's subjective security context, and certain +operations allowed or disallowed as a result. In the case of execve(), the +privilege escalation bits come into play, and may allow the resulting process +extra privileges, based on the annotations on the executable file. + + +================ +TASK CREDENTIALS +================ + +In Linux, all of a task's credentials are held in (uid, gid) or through +(groups, keys, LSM security) a refcounted structure of type 'struct cred'. +Each task points to its credentials by a pointer called 'cred' in its +task_struct. + +Once a set of credentials has been prepared and committed, it may not be +changed, barring the following exceptions: + + (1) its reference count may be changed; + + (2) the reference count on the group_info struct it points to may be changed; + + (3) the reference count on the security data it points to may be changed; + + (4) the reference count on any keyrings it points to may be changed; + + (5) any keyrings it points to may be revoked, expired or have their security + attributes changed; and + + (6) the contents of any keyrings to which it points may be changed (the whole + point of keyrings being a shared set of credentials, modifiable by anyone + with appropriate access). + +To alter anything in the cred struct, the copy-and-replace principle must be +adhered to. First take a copy, then alter the copy and then use RCU to change +the task pointer to make it point to the new copy. There are wrappers to aid +with this (see below). + +A task may only alter its _own_ credentials; it is no longer permitted for a +task to alter another's credentials. This means the capset() system call is no +longer permitted to take any PID other than the one of the current process. +Also keyctl_instantiate() and keyctl_negate() functions no longer permit +attachment to process-specific keyrings in the requesting process as the +instantiating process may need to create them. + + +IMMUTABLE CREDENTIALS +--------------------- + +Once a set of credentials has been made public (by calling commit_creds() for +example), it must be considered immutable, barring two exceptions: + + (1) The reference count may be altered. + + (2) Whilst the keyring subscriptions of a set of credentials may not be + changed, the keyrings subscribed to may have their contents altered. + +To catch accidental credential alteration at compile time, struct task_struct +has _const_ pointers to its credential sets, as does struct file. Furthermore, +certain functions such as get_cred() and put_cred() operate on const pointers, +thus rendering casts unnecessary, but require to temporarily ditch the const +qualification to be able to alter the reference count. + + +ACCESSING TASK CREDENTIALS +-------------------------- + +A task being able to alter only its own credentials permits the current process +to read or replace its own credentials without the need for any form of locking +- which simplifies things greatly. It can just call: + + const struct cred *current_cred() + +to get a pointer to its credentials structure, and it doesn't have to release +it afterwards. + +There are convenience wrappers for retrieving specific aspects of a task's +credentials (the value is simply returned in each case): + + uid_t current_uid(void) Current's real UID + gid_t current_gid(void) Current's real GID + uid_t current_euid(void) Current's effective UID + gid_t current_egid(void) Current's effective GID + uid_t current_fsuid(void) Current's file access UID + gid_t current_fsgid(void) Current's file access GID + kernel_cap_t current_cap(void) Current's effective capabilities + void *current_security(void) Current's LSM security pointer + struct user_struct *current_user(void) Current's user account + +There are also convenience wrappers for retrieving specific associated pairs of +a task's credentials: + + void current_uid_gid(uid_t *, gid_t *); + void current_euid_egid(uid_t *, gid_t *); + void current_fsuid_fsgid(uid_t *, gid_t *); + +which return these pairs of values through their arguments after retrieving +them from the current task's credentials. + + +In addition, there is a function for obtaining a reference on the current +process's current set of credentials: + + const struct cred *get_current_cred(void); + +and functions for getting references to one of the credentials that don't +actually live in struct cred: + + struct user_struct *get_current_user(void); + struct group_info *get_current_groups(void); + +which get references to the current process's user accounting structure and +supplementary groups list respectively. + +Once a reference has been obtained, it must be released with put_cred(), +free_uid() or put_group_info() as appropriate. + + +ACCESSING ANOTHER TASK'S CREDENTIALS +------------------------------------ + +Whilst a task may access its own credentials without the need for locking, the +same is not true of a task wanting to access another task's credentials. It +must use the RCU read lock and rcu_dereference(). + +The rcu_dereference() is wrapped by: + + const struct cred *__task_cred(struct task_struct *task); + +This should be used inside the RCU read lock, as in the following example: + + void foo(struct task_struct *t, struct foo_data *f) + { + const struct cred *tcred; + ... + rcu_read_lock(); + tcred = __task_cred(t); + f->uid = tcred->uid; + f->gid = tcred->gid; + f->groups = get_group_info(tcred->groups); + rcu_read_unlock(); + ... + } + +A function need not get RCU read lock to use __task_cred() if it is holding a +spinlock at the time as this implicitly holds the RCU read lock. + +Should it be necessary to hold another task's credentials for a long period of +time, and possibly to sleep whilst doing so, then the caller should get a +reference on them using: + + const struct cred *get_task_cred(struct task_struct *task); + +This does all the RCU magic inside of it. The caller must call put_cred() on +the credentials so obtained when they're finished with. + +There are a couple of convenience functions to access bits of another task's +credentials, hiding the RCU magic from the caller: + + uid_t task_uid(task) Task's real UID + uid_t task_euid(task) Task's effective UID + +If the caller is holding a spinlock or the RCU read lock at the time anyway, +then: + + __task_cred(task)->uid + __task_cred(task)->euid + +should be used instead. Similarly, if multiple aspects of a task's credentials +need to be accessed, RCU read lock or a spinlock should be used, __task_cred() +called, the result stored in a temporary pointer and then the credential +aspects called from that before dropping the lock. This prevents the +potentially expensive RCU magic from being invoked multiple times. + +Should some other single aspect of another task's credentials need to be +accessed, then this can be used: + + task_cred_xxx(task, member) + +where 'member' is a non-pointer member of the cred struct. For instance: + + uid_t task_cred_xxx(task, suid); + +will retrieve 'struct cred::suid' from the task, doing the appropriate RCU +magic. This may not be used for pointer members as what they point to may +disappear the moment the RCU read lock is dropped. + + +ALTERING CREDENTIALS +-------------------- + +As previously mentioned, a task may only alter its own credentials, and may not +alter those of another task. This means that it doesn't need to use any +locking to alter its own credentials. + +To alter the current process's credentials, a function should first prepare a +new set of credentials by calling: + + struct cred *prepare_creds(void); + +this locks current->cred_replace_mutex and then allocates and constructs a +duplicate of the current process's credentials, returning with the mutex still +held if successful. It returns NULL if not successful (out of memory). + +The mutex prevents ptrace() from altering the ptrace state of a process whilst +security checks on credentials construction and changing is taking place as +the ptrace state may alter the outcome, particularly in the case of execve(). + +The new credentials set should be altered appropriately, and any security +checks and hooks done. Both the current and the proposed sets of credentials +are available for this purpose as current_cred() will return the current set +still at this point. + + +When the credential set is ready, it should be committed to the current process +by calling: + + int commit_creds(struct cred *new); + +This will alter various aspects of the credentials and the process, giving the +LSM a chance to do likewise, then it will use rcu_assign_pointer() to actually +commit the new credentials to current->cred, it will release +current->cred_replace_mutex to allow ptrace() to take place, and it will notify +the scheduler and others of the changes. + +This function is guaranteed to return 0, so that it can be tail-called at the +end of such functions as sys_setresuid(). + +Note that this function consumes the caller's reference to the new credentials. +The caller should _not_ call put_cred() on the new credentials afterwards. + +Furthermore, once this function has been called on a new set of credentials, +those credentials may _not_ be changed further. + + +Should the security checks fail or some other error occur after prepare_creds() +has been called, then the following function should be invoked: + + void abort_creds(struct cred *new); + +This releases the lock on current->cred_replace_mutex that prepare_creds() got +and then releases the new credentials. + + +A typical credentials alteration function would look something like this: + + int alter_suid(uid_t suid) + { + struct cred *new; + int ret; + + new = prepare_creds(); + if (!new) + return -ENOMEM; + + new->suid = suid; + ret = security_alter_suid(new); + if (ret < 0) { + abort_creds(new); + return ret; + } + + return commit_creds(new); + } + + +MANAGING CREDENTIALS +-------------------- + +There are some functions to help manage credentials: + + (*) void put_cred(const struct cred *cred); + + This releases a reference to the given set of credentials. If the + reference count reaches zero, the credentials will be scheduled for + destruction by the RCU system. + + (*) const struct cred *get_cred(const struct cred *cred); + + This gets a reference on a live set of credentials, returning a pointer to + that set of credentials. + + (*) struct cred *get_new_cred(struct cred *cred); + + This gets a reference on a set of credentials that is under construction + and is thus still mutable, returning a pointer to that set of credentials. + + +===================== +OPEN FILE CREDENTIALS +===================== + +When a new file is opened, a reference is obtained on the opening task's +credentials and this is attached to the file struct as 'f_cred' in place of +'f_uid' and 'f_gid'. Code that used to access file->f_uid and file->f_gid +should now access file->f_cred->fsuid and file->f_cred->fsgid. + +It is safe to access f_cred without the use of RCU or locking because the +pointer will not change over the lifetime of the file struct, and nor will the +contents of the cred struct pointed to, barring the exceptions listed above +(see the Task Credentials section). + + +======================================= +OVERRIDING THE VFS'S USE OF CREDENTIALS +======================================= + +Under some circumstances it is desirable to override the credentials used by +the VFS, and that can be done by calling into such as vfs_mkdir() with a +different set of credentials. This is done in the following places: + + (*) sys_faccessat(). + + (*) do_coredump(). + + (*) nfs4recover.c. diff --git a/include/linux/cred.h b/include/linux/cred.h index 8edb4d1d5427..794aab5c66e5 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -1,4 +1,4 @@ -/* Credentials management +/* Credentials management - see Documentation/credentials.txt * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) @@ -169,6 +169,12 @@ static inline struct cred *get_new_cred(struct cred *cred) * * Get a reference on the specified set of credentials. The caller must * release the reference. + * + * This is used to deal with a committed set of credentials. Although the + * pointer is const, this will temporarily discard the const and increment the + * usage count. The purpose of this is to attempt to catch at compile time the + * accidental alteration of a set of credentials that should be considered + * immutable. */ static inline const struct cred *get_cred(const struct cred *cred) { @@ -181,6 +187,10 @@ static inline const struct cred *get_cred(const struct cred *cred) * * Release a reference to a set of credentials, deleting them when the last ref * is released. + * + * This takes a const pointer to a set of credentials because the credentials + * on task_struct are attached by const pointers to prevent accidental + * alteration of otherwise immutable credential sets. */ static inline void put_cred(const struct cred *_cred) { diff --git a/kernel/cred.c b/kernel/cred.c index e6fcdd67b2ec..b8bd2f99d8ce 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -1,4 +1,4 @@ -/* Task credentials management +/* Task credentials management - see Documentation/credentials.txt * * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) -- cgit v1.2.3 From 3b11a1decef07c19443d24ae926982bc8ec9f4c0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:26 +1100 Subject: CRED: Differentiate objective and effective subjective credentials on a task Differentiate the objective and real subjective credentials from the effective subjective credentials on a task by introducing a second credentials pointer into the task_struct. task_struct::real_cred then refers to the objective and apparent real subjective credentials of a task, as perceived by the other tasks in the system. task_struct::cred then refers to the effective subjective credentials of a task, as used by that task when it's actually running. These are not visible to the other tasks in the system. __task_cred(task) then refers to the objective/real credentials of the task in question. current_cred() refers to the effective subjective credentials of the current task. prepare_creds() uses the objective creds as a base and commit_creds() changes both pointers in the task_struct (indeed commit_creds() requires them to be the same). override_creds() and revert_creds() change the subjective creds pointer only, and the former returns the old subjective creds. These are used by NFSD, faccessat() and do_coredump(), and will by used by CacheFiles. In SELinux, current_has_perm() is provided as an alternative to task_has_perm(). This uses the effective subjective context of current, whereas task_has_perm() uses the objective/real context of the subject. Signed-off-by: David Howells Signed-off-by: James Morris --- fs/nfsd/auth.c | 5 +++- include/linux/cred.h | 29 +++++++++++---------- include/linux/init_task.h | 1 + include/linux/sched.h | 5 +++- kernel/cred.c | 38 ++++++++++++++++++--------- kernel/fork.c | 6 +++-- security/selinux/hooks.c | 65 ++++++++++++++++++++++++++++++----------------- 7 files changed, 95 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 836ffa1047d9..0184fe9b514c 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -34,6 +34,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) int flags = nfsexp_flags(rqstp, exp); int ret; + /* discard any old override before preparing the new set */ + revert_creds(get_cred(current->real_cred)); new = prepare_creds(); if (!new) return -ENOMEM; @@ -82,7 +84,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); - return commit_creds(new); + put_cred(override_creds(new)); + return 0; oom: ret = -ENOMEM; diff --git a/include/linux/cred.h b/include/linux/cred.h index 794aab5c66e5..55a9c995d694 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -146,8 +146,8 @@ extern struct cred *prepare_exec_creds(void); extern struct cred *prepare_usermodehelper_creds(void); extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); -extern const struct cred *override_creds(const struct cred *) __deprecated; -extern void revert_creds(const struct cred *) __deprecated; +extern const struct cred *override_creds(const struct cred *); +extern void revert_creds(const struct cred *); extern void __init cred_init(void); /** @@ -202,32 +202,32 @@ static inline void put_cred(const struct cred *_cred) } /** - * current_cred - Access the current task's credentials + * current_cred - Access the current task's subjective credentials * - * Access the credentials of the current task. + * Access the subjective credentials of the current task. */ #define current_cred() \ (current->cred) /** - * __task_cred - Access another task's credentials + * __task_cred - Access a task's objective credentials * @task: The task to query * - * Access the credentials of another task. The caller must hold the - * RCU readlock. + * Access the objective credentials of a task. The caller must hold the RCU + * readlock. * * The caller must make sure task doesn't go away, either by holding a ref on * task or by holding tasklist_lock to prevent it from being unlinked. */ #define __task_cred(task) \ - ((const struct cred *)(rcu_dereference((task)->cred))) + ((const struct cred *)(rcu_dereference((task)->real_cred))) /** - * get_task_cred - Get another task's credentials + * get_task_cred - Get another task's objective credentials * @task: The task to query * - * Get the credentials of a task, pinning them so that they can't go away. - * Accessing a task's credentials directly is not permitted. + * Get the objective credentials of a task, pinning them so that they can't go + * away. Accessing a task's credentials directly is not permitted. * * The caller must make sure task doesn't go away, either by holding a ref on * task or by holding tasklist_lock to prevent it from being unlinked. @@ -243,10 +243,11 @@ static inline void put_cred(const struct cred *_cred) }) /** - * get_current_cred - Get the current task's credentials + * get_current_cred - Get the current task's subjective credentials * - * Get the credentials of the current task, pinning them so that they can't go - * away. Accessing the current task's credentials directly is not permitted. + * Get the subjective credentials of the current task, pinning them so that + * they can't go away. Accessing the current task's credentials directly is + * not permitted. */ #define get_current_cred() \ (get_cred(current_cred())) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 08c3b24ad9a8..2597858035cd 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -149,6 +149,7 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ + .real_cred = &init_cred, \ .cred = &init_cred, \ .cred_exec_mutex = \ __MUTEX_INITIALIZER(tsk.cred_exec_mutex), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 121d655e460d..3443123b0709 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1145,7 +1145,10 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - const struct cred *cred; /* actual/objective task credentials (COW) */ + const struct cred *real_cred; /* objective and real subjective task + * credentials (COW) */ + const struct cred *cred; /* effective (overridable) subjective task + * credentials (COW) */ struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */ char comm[TASK_COMM_LEN]; /* executable name excluding path diff --git a/kernel/cred.c b/kernel/cred.c index b8bd2f99d8ce..f3ca10660617 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -35,7 +35,7 @@ static struct thread_group_cred init_tgcred = { * The initial credentials for the initial task */ struct cred init_cred = { - .usage = ATOMIC_INIT(3), + .usage = ATOMIC_INIT(4), .securebits = SECUREBITS_DEFAULT, .cap_inheritable = CAP_INIT_INH_SET, .cap_permitted = CAP_FULL_SET, @@ -120,6 +120,8 @@ EXPORT_SYMBOL(__put_cred); * prepare a new copy, which the caller then modifies and then commits by * calling commit_creds(). * + * Preparation involves making a copy of the objective creds for modification. + * * Returns a pointer to the new creds-to-be if successful, NULL otherwise. * * Call commit_creds() or abort_creds() to clean up. @@ -130,7 +132,7 @@ struct cred *prepare_creds(void) const struct cred *old; struct cred *new; - BUG_ON(atomic_read(&task->cred->usage) < 1); + BUG_ON(atomic_read(&task->real_cred->usage) < 1); new = kmem_cache_alloc(cred_jar, GFP_KERNEL); if (!new) @@ -262,6 +264,9 @@ error: * * We share if we can, but under some circumstances we have to generate a new * set. + * + * The new process gets the current process's subjective credentials as its + * objective and subjective credentials */ int copy_creds(struct task_struct *p, unsigned long clone_flags) { @@ -278,6 +283,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) #endif clone_flags & CLONE_THREAD ) { + p->real_cred = get_cred(p->cred); get_cred(p->cred); atomic_inc(&p->cred->user->processes); return 0; @@ -317,7 +323,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) #endif atomic_inc(&new->user->processes); - p->cred = new; + p->cred = p->real_cred = get_cred(new); return 0; } @@ -326,7 +332,9 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) * @new: The credentials to be assigned * * Install a new set of credentials to the current task, using RCU to replace - * the old set. + * the old set. Both the objective and the subjective credentials pointers are + * updated. This function may not be called if the subjective credentials are + * in an overridden state. * * This function eats the caller's reference to the new credentials. * @@ -338,12 +346,15 @@ int commit_creds(struct cred *new) struct task_struct *task = current; const struct cred *old; + BUG_ON(task->cred != task->real_cred); + BUG_ON(atomic_read(&task->real_cred->usage) < 2); BUG_ON(atomic_read(&new->usage) < 1); - BUG_ON(atomic_read(&task->cred->usage) < 1); - old = task->cred; + old = task->real_cred; security_commit_creds(new, old); + get_cred(new); /* we will require a ref for the subj creds too */ + /* dumpability changes */ if (old->euid != new->euid || old->egid != new->egid || @@ -369,6 +380,7 @@ int commit_creds(struct cred *new) */ if (new->user != old->user) atomic_inc(&new->user->processes); + rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); if (new->user != old->user) atomic_dec(&old->user->processes); @@ -388,6 +400,8 @@ int commit_creds(struct cred *new) new->fsgid != old->fsgid) proc_id_connector(task, PROC_EVENT_GID); + /* release the old obj and subj refs both */ + put_cred(old); put_cred(old); return 0; } @@ -408,11 +422,11 @@ void abort_creds(struct cred *new) EXPORT_SYMBOL(abort_creds); /** - * override_creds - Temporarily override the current process's credentials + * override_creds - Override the current process's subjective credentials * @new: The credentials to be assigned * - * Install a set of temporary override credentials on the current process, - * returning the old set for later reversion. + * Install a set of temporary override subjective credentials on the current + * process, returning the old set for later reversion. */ const struct cred *override_creds(const struct cred *new) { @@ -424,11 +438,11 @@ const struct cred *override_creds(const struct cred *new) EXPORT_SYMBOL(override_creds); /** - * revert_creds - Revert a temporary credentials override + * revert_creds - Revert a temporary subjective credentials override * @old: The credentials to be restored * - * Revert a temporary set of override credentials to an old set, discarding the - * override set. + * Revert a temporary set of override subjective credentials to an old set, + * discarding the override set. */ void revert_creds(const struct cred *old) { diff --git a/kernel/fork.c b/kernel/fork.c index 82a7948a664e..af0d0f04585c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -146,6 +146,7 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); + put_cred(tsk->real_cred); put_cred(tsk->cred); delayacct_tsk_free(tsk); @@ -961,10 +962,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; - if (atomic_read(&p->cred->user->processes) >= + if (atomic_read(&p->real_cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->cred->user != current->nsproxy->user_ns->root_user) + p->real_cred->user != current->nsproxy->user_ns->root_user) goto bad_fork_free; } @@ -1278,6 +1279,7 @@ bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); + put_cred(p->real_cred); put_cred(p->cred); bad_fork_free: free_task(p); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 21a592184633..91b06f2aa963 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -161,7 +161,7 @@ static int selinux_secmark_enabled(void) */ static void cred_init_security(void) { - struct cred *cred = (struct cred *) current->cred; + struct cred *cred = (struct cred *) current->real_cred; struct task_security_struct *tsec; tsec = kzalloc(sizeof(struct task_security_struct), GFP_KERNEL); @@ -184,7 +184,7 @@ static inline u32 cred_sid(const struct cred *cred) } /* - * get the security ID of a task + * get the objective security ID of a task */ static inline u32 task_sid(const struct task_struct *task) { @@ -197,7 +197,7 @@ static inline u32 task_sid(const struct task_struct *task) } /* - * get the security ID of the current task + * get the subjective security ID of the current task */ static inline u32 current_sid(void) { @@ -1395,6 +1395,7 @@ static int cred_has_perm(const struct cred *actor, * Check permission between a pair of tasks, e.g. signal checks, * fork check, ptrace check, etc. * tsk1 is the actor and tsk2 is the target + * - this uses the default subjective creds of tsk1 */ static int task_has_perm(const struct task_struct *tsk1, const struct task_struct *tsk2, @@ -1410,6 +1411,22 @@ static int task_has_perm(const struct task_struct *tsk1, return avc_has_perm(sid1, sid2, SECCLASS_PROCESS, perms, NULL); } +/* + * Check permission between current and another task, e.g. signal checks, + * fork check, ptrace check, etc. + * current is the actor and tsk2 is the target + * - this uses current's subjective creds + */ +static int current_has_perm(const struct task_struct *tsk, + u32 perms) +{ + u32 sid, tsid; + + sid = current_sid(); + tsid = task_sid(tsk); + return avc_has_perm(sid, tsid, SECCLASS_PROCESS, perms, NULL); +} + #if CAP_LAST_CAP > 63 #error Fix SELinux to handle capabilities > 63. #endif @@ -1807,7 +1824,7 @@ static int selinux_ptrace_may_access(struct task_struct *child, return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL); } - return task_has_perm(current, child, PROCESS__PTRACE); + return current_has_perm(child, PROCESS__PTRACE); } static int selinux_ptrace_traceme(struct task_struct *parent) @@ -1826,7 +1843,7 @@ static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, { int error; - error = task_has_perm(current, target, PROCESS__GETCAP); + error = current_has_perm(target, PROCESS__GETCAP); if (error) return error; @@ -3071,7 +3088,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, } else if (!vma->vm_file && vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack) { - rc = task_has_perm(current, current, PROCESS__EXECSTACK); + rc = current_has_perm(current, PROCESS__EXECSTACK); } else if (vma->vm_file && vma->anon_vma) { /* * We are making executable a file mapping that has @@ -3220,7 +3237,7 @@ static int selinux_task_create(unsigned long clone_flags) if (rc) return rc; - return task_has_perm(current, current, PROCESS__FORK); + return current_has_perm(current, PROCESS__FORK); } /* @@ -3285,17 +3302,17 @@ static int selinux_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags) static int selinux_task_setpgid(struct task_struct *p, pid_t pgid) { - return task_has_perm(current, p, PROCESS__SETPGID); + return current_has_perm(p, PROCESS__SETPGID); } static int selinux_task_getpgid(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__GETPGID); + return current_has_perm(p, PROCESS__GETPGID); } static int selinux_task_getsid(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__GETSESSION); + return current_has_perm(p, PROCESS__GETSESSION); } static void selinux_task_getsecid(struct task_struct *p, u32 *secid) @@ -3317,7 +3334,7 @@ static int selinux_task_setnice(struct task_struct *p, int nice) if (rc) return rc; - return task_has_perm(current, p, PROCESS__SETSCHED); + return current_has_perm(p, PROCESS__SETSCHED); } static int selinux_task_setioprio(struct task_struct *p, int ioprio) @@ -3328,12 +3345,12 @@ static int selinux_task_setioprio(struct task_struct *p, int ioprio) if (rc) return rc; - return task_has_perm(current, p, PROCESS__SETSCHED); + return current_has_perm(p, PROCESS__SETSCHED); } static int selinux_task_getioprio(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__GETSCHED); + return current_has_perm(p, PROCESS__GETSCHED); } static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim) @@ -3350,7 +3367,7 @@ static int selinux_task_setrlimit(unsigned int resource, struct rlimit *new_rlim later be used as a safe reset point for the soft limit upon context transitions. See selinux_bprm_committing_creds. */ if (old_rlim->rlim_max != new_rlim->rlim_max) - return task_has_perm(current, current, PROCESS__SETRLIMIT); + return current_has_perm(current, PROCESS__SETRLIMIT); return 0; } @@ -3363,17 +3380,17 @@ static int selinux_task_setscheduler(struct task_struct *p, int policy, struct s if (rc) return rc; - return task_has_perm(current, p, PROCESS__SETSCHED); + return current_has_perm(p, PROCESS__SETSCHED); } static int selinux_task_getscheduler(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__GETSCHED); + return current_has_perm(p, PROCESS__GETSCHED); } static int selinux_task_movememory(struct task_struct *p) { - return task_has_perm(current, p, PROCESS__SETSCHED); + return current_has_perm(p, PROCESS__SETSCHED); } static int selinux_task_kill(struct task_struct *p, struct siginfo *info, @@ -3394,7 +3411,7 @@ static int selinux_task_kill(struct task_struct *p, struct siginfo *info, rc = avc_has_perm(secid, task_sid(p), SECCLASS_PROCESS, perm, NULL); else - rc = task_has_perm(current, p, perm); + rc = current_has_perm(p, perm); return rc; } @@ -5250,7 +5267,7 @@ static int selinux_getprocattr(struct task_struct *p, unsigned len; if (current != p) { - error = task_has_perm(current, p, PROCESS__GETATTR); + error = current_has_perm(p, PROCESS__GETATTR); if (error) return error; } @@ -5309,15 +5326,15 @@ static int selinux_setprocattr(struct task_struct *p, * above restriction is ever removed. */ if (!strcmp(name, "exec")) - error = task_has_perm(current, p, PROCESS__SETEXEC); + error = current_has_perm(p, PROCESS__SETEXEC); else if (!strcmp(name, "fscreate")) - error = task_has_perm(current, p, PROCESS__SETFSCREATE); + error = current_has_perm(p, PROCESS__SETFSCREATE); else if (!strcmp(name, "keycreate")) - error = task_has_perm(current, p, PROCESS__SETKEYCREATE); + error = current_has_perm(p, PROCESS__SETKEYCREATE); else if (!strcmp(name, "sockcreate")) - error = task_has_perm(current, p, PROCESS__SETSOCKCREATE); + error = current_has_perm(p, PROCESS__SETSOCKCREATE); else if (!strcmp(name, "current")) - error = task_has_perm(current, p, PROCESS__SETCURRENT); + error = current_has_perm(p, PROCESS__SETCURRENT); else error = -EINVAL; if (error) -- cgit v1.2.3 From 3a3b7ce9336952ea7b9564d976d068a238976c9d Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:28 +1100 Subject: CRED: Allow kernel services to override LSM settings for task actions Allow kernel services to override LSM settings appropriate to the actions performed by a task by duplicating a set of credentials, modifying it and then using task_struct::cred to point to it when performing operations on behalf of a task. This is used, for example, by CacheFiles which has to transparently access the cache on behalf of a process that thinks it is doing, say, NFS accesses with a potentially inappropriate (with respect to accessing the cache) set of credentials. This patch provides two LSM hooks for modifying a task security record: (*) security_kernel_act_as() which allows modification of the security datum with which a task acts on other objects (most notably files). (*) security_kernel_create_files_as() which allows modification of the security datum that is used to initialise the security data on a file that a task creates. The patch also provides four new credentials handling functions, which wrap the LSM functions: (1) prepare_kernel_cred() Prepare a set of credentials for a kernel service to use, based either on a daemon's credentials or on init_cred. All the keyrings are cleared. (2) set_security_override() Set the LSM security ID in a set of credentials to a specific security context, assuming permission from the LSM policy. (3) set_security_override_from_ctx() As (2), but takes the security context as a string. (4) set_create_files_as() Set the file creation LSM security ID in a set of credentials to be the same as that on a particular inode. Signed-off-by: Casey Schaufler [Smack changes] Signed-off-by: David Howells Signed-off-by: James Morris --- include/linux/cred.h | 6 +++ include/linux/security.h | 28 +++++++++++ kernel/cred.c | 113 +++++++++++++++++++++++++++++++++++++++++++++ security/capability.c | 12 +++++ security/security.c | 10 ++++ security/selinux/hooks.c | 46 ++++++++++++++++++ security/smack/smack_lsm.c | 37 +++++++++++++++ 7 files changed, 252 insertions(+) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index 55a9c995d694..26c1ab179946 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -18,6 +18,7 @@ struct user_struct; struct cred; +struct inode; /* * COW Supplementary groups list @@ -148,6 +149,11 @@ extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); extern const struct cred *override_creds(const struct cred *); extern void revert_creds(const struct cred *); +extern struct cred *prepare_kernel_cred(struct task_struct *); +extern int change_create_files_as(struct cred *, struct inode *); +extern int set_security_override(struct cred *, u32); +extern int set_security_override_from_ctx(struct cred *, const char *); +extern int set_create_files_as(struct cred *, struct inode *); extern void __init cred_init(void); /** diff --git a/include/linux/security.h b/include/linux/security.h index 56a0eed65673..59a11e19b617 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -587,6 +587,19 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @new points to the new credentials. * @old points to the original credentials. * Install a new set of credentials. + * @kernel_act_as: + * Set the credentials for a kernel service to act as (subjective context). + * @new points to the credentials to be modified. + * @secid specifies the security ID to be set + * The current task must be the one that nominated @secid. + * Return 0 if successful. + * @kernel_create_files_as: + * Set the file creation context in a set of credentials to be the same as + * the objective context of the specified inode. + * @new points to the credentials to be modified. + * @inode points to the inode to use as a reference. + * The current task must be the one that nominated @inode. + * Return 0 if successful. * @task_setuid: * Check permission before setting one or more of the user identity * attributes of the current process. The @flags parameter indicates @@ -1381,6 +1394,8 @@ struct security_operations { int (*cred_prepare)(struct cred *new, const struct cred *old, gfp_t gfp); void (*cred_commit)(struct cred *new, const struct cred *old); + int (*kernel_act_as)(struct cred *new, u32 secid); + int (*kernel_create_files_as)(struct cred *new, struct inode *inode); int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags); int (*task_fix_setuid) (struct cred *new, const struct cred *old, int flags); @@ -1632,6 +1647,8 @@ int security_task_create(unsigned long clone_flags); void security_cred_free(struct cred *cred); int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp); void security_commit_creds(struct cred *new, const struct cred *old); +int security_kernel_act_as(struct cred *new, u32 secid); +int security_kernel_create_files_as(struct cred *new, struct inode *inode); int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags); int security_task_fix_setuid(struct cred *new, const struct cred *old, int flags); @@ -2151,6 +2168,17 @@ static inline void security_commit_creds(struct cred *new, { } +static inline int security_kernel_act_as(struct cred *cred, u32 secid) +{ + return 0; +} + +static inline int security_kernel_create_files_as(struct cred *cred, + struct inode *inode) +{ + return 0; +} + static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) { diff --git a/kernel/cred.c b/kernel/cred.c index f3ca10660617..13697ca2bb38 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -462,3 +462,116 @@ void __init cred_init(void) cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); } + +/** + * prepare_kernel_cred - Prepare a set of credentials for a kernel service + * @daemon: A userspace daemon to be used as a reference + * + * Prepare a set of credentials for a kernel service. This can then be used to + * override a task's own credentials so that work can be done on behalf of that + * task that requires a different subjective context. + * + * @daemon is used to provide a base for the security record, but can be NULL. + * If @daemon is supplied, then the security data will be derived from that; + * otherwise they'll be set to 0 and no groups, full capabilities and no keys. + * + * The caller may change these controls afterwards if desired. + * + * Returns the new credentials or NULL if out of memory. + * + * Does not take, and does not return holding current->cred_replace_mutex. + */ +struct cred *prepare_kernel_cred(struct task_struct *daemon) +{ + const struct cred *old; + struct cred *new; + + new = kmem_cache_alloc(cred_jar, GFP_KERNEL); + if (!new) + return NULL; + + if (daemon) + old = get_task_cred(daemon); + else + old = get_cred(&init_cred); + + get_uid(new->user); + get_group_info(new->group_info); + +#ifdef CONFIG_KEYS + atomic_inc(&init_tgcred.usage); + new->tgcred = &init_tgcred; + new->request_key_auth = NULL; + new->thread_keyring = NULL; + new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; +#endif + +#ifdef CONFIG_SECURITY + new->security = NULL; +#endif + if (security_prepare_creds(new, old, GFP_KERNEL) < 0) + goto error; + + atomic_set(&new->usage, 1); + put_cred(old); + return new; + +error: + put_cred(new); + return NULL; +} +EXPORT_SYMBOL(prepare_kernel_cred); + +/** + * set_security_override - Set the security ID in a set of credentials + * @new: The credentials to alter + * @secid: The LSM security ID to set + * + * Set the LSM security ID in a set of credentials so that the subjective + * security is overridden when an alternative set of credentials is used. + */ +int set_security_override(struct cred *new, u32 secid) +{ + return security_kernel_act_as(new, secid); +} +EXPORT_SYMBOL(set_security_override); + +/** + * set_security_override_from_ctx - Set the security ID in a set of credentials + * @new: The credentials to alter + * @secctx: The LSM security context to generate the security ID from. + * + * Set the LSM security ID in a set of credentials so that the subjective + * security is overridden when an alternative set of credentials is used. The + * security ID is specified in string form as a security context to be + * interpreted by the LSM. + */ +int set_security_override_from_ctx(struct cred *new, const char *secctx) +{ + u32 secid; + int ret; + + ret = security_secctx_to_secid(secctx, strlen(secctx), &secid); + if (ret < 0) + return ret; + + return set_security_override(new, secid); +} +EXPORT_SYMBOL(set_security_override_from_ctx); + +/** + * set_create_files_as - Set the LSM file create context in a set of credentials + * @new: The credentials to alter + * @inode: The inode to take the context from + * + * Change the LSM file creation context in a set of credentials to be the same + * as the object context of the specified inode, so that the new inodes have + * the same MAC context as that inode. + */ +int set_create_files_as(struct cred *new, struct inode *inode) +{ + new->fsuid = inode->i_uid; + new->fsgid = inode->i_gid; + return security_kernel_create_files_as(new, inode); +} +EXPORT_SYMBOL(set_create_files_as); diff --git a/security/capability.c b/security/capability.c index 185804f99ad1..b9e391425e6f 100644 --- a/security/capability.c +++ b/security/capability.c @@ -348,6 +348,16 @@ static void cap_cred_commit(struct cred *new, const struct cred *old) { } +static int cap_kernel_act_as(struct cred *new, u32 secid) +{ + return 0; +} + +static int cap_kernel_create_files_as(struct cred *new, struct inode *inode) +{ + return 0; +} + static int cap_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) { return 0; @@ -889,6 +899,8 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, cred_free); set_to_cap_if_null(ops, cred_prepare); set_to_cap_if_null(ops, cred_commit); + set_to_cap_if_null(ops, kernel_act_as); + set_to_cap_if_null(ops, kernel_create_files_as); set_to_cap_if_null(ops, task_setuid); set_to_cap_if_null(ops, task_fix_setuid); set_to_cap_if_null(ops, task_setgid); diff --git a/security/security.c b/security/security.c index dc5babb2d6d8..038ef04b2c7f 100644 --- a/security/security.c +++ b/security/security.c @@ -616,6 +616,16 @@ void security_commit_creds(struct cred *new, const struct cred *old) return security_ops->cred_commit(new, old); } +int security_kernel_act_as(struct cred *new, u32 secid) +{ + return security_ops->kernel_act_as(new, secid); +} + +int security_kernel_create_files_as(struct cred *new, struct inode *inode) +{ + return security_ops->kernel_create_files_as(new, inode); +} + int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) { return security_ops->task_setuid(id0, id1, id2, flags); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 91b06f2aa963..520f82ab3fbf 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3277,6 +3277,50 @@ static void selinux_cred_commit(struct cred *new, const struct cred *old) secondary_ops->cred_commit(new, old); } +/* + * set the security data for a kernel service + * - all the creation contexts are set to unlabelled + */ +static int selinux_kernel_act_as(struct cred *new, u32 secid) +{ + struct task_security_struct *tsec = new->security; + u32 sid = current_sid(); + int ret; + + ret = avc_has_perm(sid, secid, + SECCLASS_KERNEL_SERVICE, + KERNEL_SERVICE__USE_AS_OVERRIDE, + NULL); + if (ret == 0) { + tsec->sid = secid; + tsec->create_sid = 0; + tsec->keycreate_sid = 0; + tsec->sockcreate_sid = 0; + } + return ret; +} + +/* + * set the file creation context in a security record to the same as the + * objective context of the specified inode + */ +static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode) +{ + struct inode_security_struct *isec = inode->i_security; + struct task_security_struct *tsec = new->security; + u32 sid = current_sid(); + int ret; + + ret = avc_has_perm(sid, isec->sid, + SECCLASS_KERNEL_SERVICE, + KERNEL_SERVICE__CREATE_FILES_AS, + NULL); + + if (ret == 0) + tsec->create_sid = isec->sid; + return 0; +} + static int selinux_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags) { /* Since setuid only affects the current process, and @@ -5593,6 +5637,8 @@ static struct security_operations selinux_ops = { .cred_free = selinux_cred_free, .cred_prepare = selinux_cred_prepare, .cred_commit = selinux_cred_commit, + .kernel_act_as = selinux_kernel_act_as, + .kernel_create_files_as = selinux_kernel_create_files_as, .task_setuid = selinux_task_setuid, .task_fix_setuid = selinux_task_fix_setuid, .task_setgid = selinux_task_setgid, diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index de396742abf4..8ad48161cef5 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1011,6 +1011,41 @@ static void smack_cred_commit(struct cred *new, const struct cred *old) { } +/** + * smack_kernel_act_as - Set the subjective context in a set of credentials + * @new points to the set of credentials to be modified. + * @secid specifies the security ID to be set + * + * Set the security data for a kernel service. + */ +static int smack_kernel_act_as(struct cred *new, u32 secid) +{ + char *smack = smack_from_secid(secid); + + if (smack == NULL) + return -EINVAL; + + new->security = smack; + return 0; +} + +/** + * smack_kernel_create_files_as - Set the file creation label in a set of creds + * @new points to the set of credentials to be modified + * @inode points to the inode to use as a reference + * + * Set the file creation context in a set of credentials to the same + * as the objective context of the specified inode + */ +static int smack_kernel_create_files_as(struct cred *new, + struct inode *inode) +{ + struct inode_smack *isp = inode->i_security; + + new->security = isp->smk_inode; + return 0; +} + /** * smack_task_setpgid - Smack check on setting pgid * @p: the task object @@ -2641,6 +2676,8 @@ struct security_operations smack_ops = { .cred_free = smack_cred_free, .cred_prepare = smack_cred_prepare, .cred_commit = smack_cred_commit, + .kernel_act_as = smack_kernel_act_as, + .kernel_create_files_as = smack_kernel_create_files_as, .task_fix_setuid = cap_task_fix_setuid, .task_setpgid = smack_task_setpgid, .task_getpgid = smack_task_getpgid, -- cgit v1.2.3 From 38a7ddffa4b79d7b1fbc9bf2fa82b21b72622858 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Thu, 13 Nov 2008 22:44:11 -0800 Subject: tcp: remove an unnecessary field in struct tcp_skb_cb The urg_ptr field is not used anywhere and is merely confusing. Signed-off-by: Petr Tesarik Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 438014d57610..8f26b28fb407 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -590,7 +590,6 @@ struct tcp_skb_cb { #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS) - __u16 urg_ptr; /* Valid w/URG flags is set. */ __u32 ack_seq; /* Sequence number ACK'd */ }; -- cgit v1.2.3 From f30ab418a1d3c5a8b83493e7d70d6876a74aa0ce Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Thu, 13 Nov 2008 22:56:30 -0800 Subject: pkt_sched: Remove qdisc->ops->requeue() etc. After implementing qdisc->ops->peek() and changing sch_netem into classless qdisc there are no more qdisc->ops->requeue() users. This patch removes this method with its wrappers (qdisc_requeue()), and also unused qdisc->requeue structure. There are a few minor fixes of warnings (htb_enqueue()) and comments btw. The idea to kill ->requeue() and a similar patch were first developed by David S. Miller. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/sch_generic.h | 17 ------------- net/sched/sch_api.c | 7 ------ net/sched/sch_atm.c | 20 +-------------- net/sched/sch_cbq.c | 35 -------------------------- net/sched/sch_dsmark.c | 21 ---------------- net/sched/sch_fifo.c | 2 -- net/sched/sch_generic.c | 23 ----------------- net/sched/sch_gred.c | 21 ---------------- net/sched/sch_hfsc.c | 19 -------------- net/sched/sch_htb.c | 44 +-------------------------------- net/sched/sch_multiq.c | 39 ++--------------------------- net/sched/sch_netem.c | 16 ------------ net/sched/sch_prio.c | 28 --------------------- net/sched/sch_red.c | 18 -------------- net/sched/sch_sfq.c | 63 ----------------------------------------------- net/sched/sch_tbf.c | 14 ----------- net/sched/sch_teql.c | 11 --------- 17 files changed, 4 insertions(+), 394 deletions(-) (limited to 'include') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 64ae1ba9f554..f8c47429044a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -53,7 +53,6 @@ struct Qdisc atomic_t refcnt; unsigned long state; struct sk_buff *gso_skb; - struct sk_buff_head requeue; struct sk_buff_head q; struct netdev_queue *dev_queue; struct Qdisc *next_sched; @@ -112,7 +111,6 @@ struct Qdisc_ops int (*enqueue)(struct sk_buff *, struct Qdisc *); struct sk_buff * (*dequeue)(struct Qdisc *); struct sk_buff * (*peek)(struct Qdisc *); - int (*requeue)(struct sk_buff *, struct Qdisc *); unsigned int (*drop)(struct Qdisc *); int (*init)(struct Qdisc *, struct nlattr *arg); @@ -467,21 +465,6 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) return skb; } -static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff_head *list) -{ - __skb_queue_head(list, skb); - sch->qstats.backlog += qdisc_pkt_len(skb); - sch->qstats.requeues++; - - return NET_XMIT_SUCCESS; -} - -static inline int qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - return __qdisc_requeue(skb, sch, &sch->q); -} - static inline void __qdisc_reset_queue(struct Qdisc *sch, struct sk_buff_head *list) { diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e5646614e88d..5bcef13408c8 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -97,11 +97,6 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, Auxiliary routines: - ---requeue - - requeues once dequeued packet. It is used for non-standard or - just buggy devices, which can defer output even if netif_queue_stopped()=0. - ---peek like dequeue but without removing a packet from the queue @@ -151,8 +146,6 @@ int register_qdisc(struct Qdisc_ops *qops) if (qops->enqueue == NULL) qops->enqueue = noop_qdisc_ops.enqueue; - if (qops->requeue == NULL) - qops->requeue = noop_qdisc_ops.requeue; if (qops->peek == NULL) { if (qops->dequeue == NULL) { qops->peek = noop_qdisc_ops.peek; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 6eb9a650b63d..ca90f6e59aee 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -62,7 +62,7 @@ struct atm_qdisc_data { struct atm_flow_data link; /* unclassified skbs go here */ struct atm_flow_data *flows; /* NB: "link" is also on this list */ - struct tasklet_struct task; /* requeue tasklet */ + struct tasklet_struct task; /* dequeue tasklet */ }; /* ------------------------- Class/flow operations ------------------------- */ @@ -534,23 +534,6 @@ static struct sk_buff *atm_tc_peek(struct Qdisc *sch) return p->link.q->ops->peek(p->link.q); } -static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct atm_qdisc_data *p = qdisc_priv(sch); - int ret; - - pr_debug("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - ret = p->link.q->ops->requeue(skb, p->link.q); - if (!ret) { - sch->q.qlen++; - sch->qstats.requeues++; - } else if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - p->link.qstats.drops++; - } - return ret; -} - static unsigned int atm_tc_drop(struct Qdisc *sch) { struct atm_qdisc_data *p = qdisc_priv(sch); @@ -707,7 +690,6 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = { .enqueue = atm_tc_enqueue, .dequeue = atm_tc_dequeue, .peek = atm_tc_peek, - .requeue = atm_tc_requeue, .drop = atm_tc_drop, .init = atm_tc_init, .reset = atm_tc_reset, diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 63efa70abbea..a99e37e9e6f1 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -405,40 +405,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -static int -cbq_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl; - int ret; - - if ((cl = q->tx_class) == NULL) { - kfree_skb(skb); - sch->qstats.drops++; - return NET_XMIT_CN; - } - q->tx_class = NULL; - - cbq_mark_toplevel(q, cl); - -#ifdef CONFIG_NET_CLS_ACT - q->rx_class = cl; - cl->q->__parent = sch; -#endif - if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - if (!cl->next_alive) - cbq_activate_class(cl); - return 0; - } - if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - cl->qstats.drops++; - } - return ret; -} - /* Overlimit actions */ /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */ @@ -2067,7 +2033,6 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = { .enqueue = cbq_enqueue, .dequeue = cbq_dequeue, .peek = qdisc_peek_dequeued, - .requeue = cbq_requeue, .drop = cbq_drop, .init = cbq_init, .reset = cbq_reset, diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 3e491479ea86..3f9427a4b757 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -322,26 +322,6 @@ static struct sk_buff *dsmark_peek(struct Qdisc *sch) return p->q->ops->peek(p->q); } -static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct dsmark_qdisc_data *p = qdisc_priv(sch); - int err; - - pr_debug("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); - - err = p->q->ops->requeue(skb, p->q); - if (err != NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(err)) - sch->qstats.drops++; - return err; - } - - sch->q.qlen++; - sch->qstats.requeues++; - - return NET_XMIT_SUCCESS; -} - static unsigned int dsmark_drop(struct Qdisc *sch) { struct dsmark_qdisc_data *p = qdisc_priv(sch); @@ -506,7 +486,6 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = { .enqueue = dsmark_enqueue, .dequeue = dsmark_dequeue, .peek = dsmark_peek, - .requeue = dsmark_requeue, .drop = dsmark_drop, .init = dsmark_init, .reset = dsmark_reset, diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 8825e8806f41..92cfc9d7e3b9 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -84,7 +84,6 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { .enqueue = pfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, - .requeue = qdisc_requeue, .drop = qdisc_queue_drop, .init = fifo_init, .reset = qdisc_reset_queue, @@ -100,7 +99,6 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { .enqueue = bfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, - .requeue = qdisc_requeue, .drop = qdisc_queue_drop, .init = fifo_init, .reset = qdisc_reset_queue, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 1192da229835..80c8f3dbbea1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -306,22 +306,12 @@ static struct sk_buff *noop_dequeue(struct Qdisc * qdisc) return NULL; } -static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc) -{ - if (net_ratelimit()) - printk(KERN_DEBUG "%s deferred output. It is buggy.\n", - skb->dev->name); - kfree_skb(skb); - return NET_XMIT_CN; -} - struct Qdisc_ops noop_qdisc_ops __read_mostly = { .id = "noop", .priv_size = 0, .enqueue = noop_enqueue, .dequeue = noop_dequeue, .peek = noop_dequeue, - .requeue = noop_requeue, .owner = THIS_MODULE, }; @@ -336,7 +326,6 @@ struct Qdisc noop_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), - .requeue.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, }; @@ -348,7 +337,6 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { .enqueue = noop_enqueue, .dequeue = noop_dequeue, .peek = noop_dequeue, - .requeue = noop_requeue, .owner = THIS_MODULE, }; @@ -364,7 +352,6 @@ static struct Qdisc noqueue_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noqueue_qdisc_ops, .list = LIST_HEAD_INIT(noqueue_qdisc.list), - .requeue.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .dev_queue = &noqueue_netdev_queue, }; @@ -426,12 +413,6 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) return NULL; } -static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) -{ - qdisc->q.qlen++; - return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc)); -} - static void pfifo_fast_reset(struct Qdisc* qdisc) { int prio; @@ -473,7 +454,6 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = { .enqueue = pfifo_fast_enqueue, .dequeue = pfifo_fast_dequeue, .peek = pfifo_fast_peek, - .requeue = pfifo_fast_requeue, .init = pfifo_fast_init, .reset = pfifo_fast_reset, .dump = pfifo_fast_dump, @@ -499,7 +479,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->padded = (char *) sch - (char *) p; INIT_LIST_HEAD(&sch->list); - skb_queue_head_init(&sch->requeue); skb_queue_head_init(&sch->q); sch->ops = ops; sch->enqueue = ops->enqueue; @@ -571,8 +550,6 @@ void qdisc_destroy(struct Qdisc *qdisc) dev_put(qdisc_dev(qdisc)); kfree_skb(qdisc->gso_skb); - __skb_queue_purge(&qdisc->requeue); - kfree((char *) qdisc - qdisc->padded); } EXPORT_SYMBOL(qdisc_destroy); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index cb20ee3b9fc2..40408d595c08 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -240,26 +240,6 @@ congestion_drop: return NET_XMIT_CN; } -static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct gred_sched *t = qdisc_priv(sch); - struct gred_sched_data *q; - u16 dp = tc_index_to_dp(skb); - - if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { - if (net_ratelimit()) - printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x " - "for requeue, screwing up backlog.\n", - tc_index_to_dp(skb)); - } else { - if (red_is_idling(&q->parms)) - red_end_of_idle_period(&q->parms); - q->backlog += qdisc_pkt_len(skb); - } - - return qdisc_requeue(skb, sch); -} - static struct sk_buff *gred_dequeue(struct Qdisc* sch) { struct sk_buff *skb; @@ -603,7 +583,6 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = { .enqueue = gred_enqueue, .dequeue = gred_dequeue, .peek = qdisc_peek_head, - .requeue = gred_requeue, .drop = gred_drop, .init = gred_init, .reset = gred_reset, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d90b1652f2af..071c4749a12b 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -184,7 +184,6 @@ struct hfsc_sched struct rb_root eligible; /* eligible tree */ struct list_head droplist; /* active leaf class list (for dropping) */ - struct sk_buff_head requeue; /* requeued packet */ struct qdisc_watchdog watchdog; /* watchdog timer */ }; @@ -1432,7 +1431,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) return err; q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); - skb_queue_head_init(&q->requeue); q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; @@ -1517,7 +1515,6 @@ hfsc_reset_qdisc(struct Qdisc *sch) hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) hfsc_reset_class(cl); } - __skb_queue_purge(&q->requeue); q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); qdisc_watchdog_cancel(&q->watchdog); @@ -1542,7 +1539,6 @@ hfsc_destroy_qdisc(struct Qdisc *sch) hfsc_destroy_class(sch, cl); } qdisc_class_hash_destroy(&q->clhash); - __skb_queue_purge(&q->requeue); qdisc_watchdog_cancel(&q->watchdog); } @@ -1609,8 +1605,6 @@ hfsc_dequeue(struct Qdisc *sch) if (sch->q.qlen == 0) return NULL; - if ((skb = __skb_dequeue(&q->requeue))) - goto out; cur_time = psched_get_time(); @@ -1659,24 +1653,12 @@ hfsc_dequeue(struct Qdisc *sch) set_passive(cl); } - out: sch->flags &= ~TCQ_F_THROTTLED; sch->q.qlen--; return skb; } -static int -hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct hfsc_sched *q = qdisc_priv(sch); - - __skb_queue_head(&q->requeue, skb); - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; -} - static unsigned int hfsc_drop(struct Qdisc *sch) { @@ -1728,7 +1710,6 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = { .enqueue = hfsc_enqueue, .dequeue = hfsc_dequeue, .peek = qdisc_peek_dequeued, - .requeue = hfsc_requeue, .drop = hfsc_drop, .cl_ops = &hfsc_class_ops, .priv_size = sizeof(struct hfsc_sched), diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 3fda8199713d..83f5e69243c1 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -551,7 +551,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - int ret; + int uninitialized_var(ret); struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_classify(skb, sch, &ret); @@ -591,47 +591,6 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } -/* TODO: requeuing packet charges it to policers again !! */ -static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - int ret; - struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = htb_classify(skb, sch, &ret); - struct sk_buff *tskb; - - if (cl == HTB_DIRECT) { - /* enqueue to helper queue */ - if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_head(&q->direct_queue, skb); - } else { - __skb_queue_head(&q->direct_queue, skb); - tskb = __skb_dequeue_tail(&q->direct_queue); - kfree_skb(tskb); - sch->qstats.drops++; - return NET_XMIT_CN; - } -#ifdef CONFIG_NET_CLS_ACT - } else if (!cl) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; -#endif - } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) != - NET_XMIT_SUCCESS) { - if (net_xmit_drop_count(ret)) { - sch->qstats.drops++; - cl->qstats.drops++; - } - return ret; - } else - htb_activate(q, cl); - - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; -} - /** * htb_charge_class - charges amount "bytes" to leaf and ancestors * @@ -1566,7 +1525,6 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = { .enqueue = htb_enqueue, .dequeue = htb_dequeue, .peek = qdisc_peek_dequeued, - .requeue = htb_requeue, .drop = htb_drop, .init = htb_init, .reset = htb_reset, diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 155648d23b7c..f645ac55a1a1 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -92,40 +92,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } - -static int -multiq_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct Qdisc *qdisc; - struct multiq_sched_data *q = qdisc_priv(sch); - int ret; - - qdisc = multiq_classify(skb, sch, &ret); -#ifdef CONFIG_NET_CLS_ACT - if (qdisc == NULL) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } -#endif - - ret = qdisc->ops->requeue(skb, qdisc); - if (ret == NET_XMIT_SUCCESS) { - sch->q.qlen++; - sch->qstats.requeues++; - if (q->curband) - q->curband--; - else - q->curband = q->bands - 1; - return NET_XMIT_SUCCESS; - } - if (net_xmit_drop_count(ret)) - sch->qstats.drops++; - return ret; -} - - static struct sk_buff *multiq_dequeue(struct Qdisc *sch) { struct multiq_sched_data *q = qdisc_priv(sch); @@ -140,7 +106,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch) q->curband = 0; /* Check that target subqueue is available before - * pulling an skb to avoid excessive requeues + * pulling an skb to avoid head-of-line blocking. */ if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) { qdisc = q->queues[q->curband]; @@ -170,7 +136,7 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch) curband = 0; /* Check that target subqueue is available before - * pulling an skb to avoid excessive requeues + * pulling an skb to avoid head-of-line blocking. */ if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) { qdisc = q->queues[curband]; @@ -480,7 +446,6 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = { .enqueue = multiq_enqueue, .dequeue = multiq_dequeue, .peek = multiq_peek, - .requeue = multiq_requeue, .drop = multiq_drop, .init = multiq_init, .reset = multiq_reset, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index f69698ff88d9..3cbc3ff7b5bc 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -252,20 +252,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } -/* Requeue packets but don't change time stamp */ -static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct netem_sched_data *q = qdisc_priv(sch); - int ret; - - if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - } - - return ret; -} - static unsigned int netem_drop(struct Qdisc* sch) { struct netem_sched_data *q = qdisc_priv(sch); @@ -531,7 +517,6 @@ static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = { .enqueue = tfifo_enqueue, .dequeue = qdisc_dequeue_head, .peek = qdisc_peek_head, - .requeue = qdisc_requeue, .drop = qdisc_queue_drop, .init = tfifo_init, .reset = qdisc_reset_queue, @@ -620,7 +605,6 @@ static struct Qdisc_ops netem_qdisc_ops __read_mostly = { .enqueue = netem_enqueue, .dequeue = netem_dequeue, .peek = qdisc_peek_dequeued, - .requeue = netem_requeue, .drop = netem_drop, .init = netem_init, .reset = netem_reset, diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 3651da3e2802..ea65a87ec22c 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -93,33 +93,6 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } - -static int -prio_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct Qdisc *qdisc; - int ret; - - qdisc = prio_classify(skb, sch, &ret); -#ifdef CONFIG_NET_CLS_ACT - if (qdisc == NULL) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } -#endif - - if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) { - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; - } - if (net_xmit_drop_count(ret)) - sch->qstats.drops++; - return ret; -} - static struct sk_buff *prio_peek(struct Qdisc *sch) { struct prio_sched_data *q = qdisc_priv(sch); @@ -435,7 +408,6 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = { .enqueue = prio_enqueue, .dequeue = prio_dequeue, .peek = prio_peek, - .requeue = prio_requeue, .drop = prio_drop, .init = prio_init, .reset = prio_reset, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 7abc51454c2d..6a0371c22643 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -108,23 +108,6 @@ congestion_drop: return NET_XMIT_CN; } -static int red_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct red_sched_data *q = qdisc_priv(sch); - struct Qdisc *child = q->qdisc; - int ret; - - if (red_is_idling(&q->parms)) - red_end_of_idle_period(&q->parms); - - ret = child->ops->requeue(skb, child); - if (likely(ret == NET_XMIT_SUCCESS)) { - sch->qstats.requeues++; - sch->q.qlen++; - } - return ret; -} - static struct sk_buff * red_dequeue(struct Qdisc* sch) { struct sk_buff *skb; @@ -370,7 +353,6 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = { .enqueue = red_enqueue, .dequeue = red_dequeue, .peek = red_peek, - .requeue = red_requeue, .drop = red_drop, .init = red_init, .reset = red_reset, diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 198b83d42ba8..ab8cfee3c9ce 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -329,68 +329,6 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_CN; } -static int -sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) -{ - struct sfq_sched_data *q = qdisc_priv(sch); - unsigned int hash; - sfq_index x; - int ret; - - hash = sfq_classify(skb, sch, &ret); - if (hash == 0) { - if (ret & __NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; - } - hash--; - - x = q->ht[hash]; - if (x == SFQ_DEPTH) { - q->ht[hash] = x = q->dep[SFQ_DEPTH].next; - q->hash[x] = hash; - } - - sch->qstats.backlog += qdisc_pkt_len(skb); - __skb_queue_head(&q->qs[x], skb); - /* If selected queue has length q->limit+1, this means that - * all another queues are empty and we do simple tail drop. - * This packet is still requeued at head of queue, tail packet - * is dropped. - */ - if (q->qs[x].qlen > q->limit) { - skb = q->qs[x].prev; - __skb_unlink(skb, &q->qs[x]); - sch->qstats.drops++; - sch->qstats.backlog -= qdisc_pkt_len(skb); - kfree_skb(skb); - return NET_XMIT_CN; - } - - sfq_inc(q, x); - if (q->qs[x].qlen == 1) { /* The flow is new */ - if (q->tail == SFQ_DEPTH) { /* It is the first flow */ - q->tail = x; - q->next[x] = x; - q->allot[x] = q->quantum; - } else { - q->next[x] = q->next[q->tail]; - q->next[q->tail] = x; - q->tail = x; - } - } - - if (++sch->q.qlen <= q->limit) { - sch->qstats.requeues++; - return 0; - } - - sch->qstats.drops++; - sfq_drop(sch); - return NET_XMIT_CN; -} - static struct sk_buff * sfq_peek(struct Qdisc *sch) { @@ -636,7 +574,6 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = { .enqueue = sfq_enqueue, .dequeue = sfq_dequeue, .peek = sfq_peek, - .requeue = sfq_requeue, .drop = sfq_drop, .init = sfq_init, .reset = sfq_reset, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 435076cf620e..bb7783d584bb 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -139,19 +139,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) return 0; } -static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct tbf_sched_data *q = qdisc_priv(sch); - int ret; - - if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) { - sch->q.qlen++; - sch->qstats.requeues++; - } - - return ret; -} - static unsigned int tbf_drop(struct Qdisc* sch) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -468,7 +455,6 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = { .enqueue = tbf_enqueue, .dequeue = tbf_dequeue, .peek = qdisc_peek_dequeued, - .requeue = tbf_requeue, .drop = tbf_drop, .init = tbf_init, .reset = tbf_reset, diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index bf03e7fa1849..cfc8e7caba62 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -93,16 +93,6 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) return NET_XMIT_DROP; } -static int -teql_requeue(struct sk_buff *skb, struct Qdisc* sch) -{ - struct teql_sched_data *q = qdisc_priv(sch); - - __skb_queue_head(&q->q, skb); - sch->qstats.requeues++; - return 0; -} - static struct sk_buff * teql_dequeue(struct Qdisc* sch) { @@ -441,7 +431,6 @@ static __init void teql_master_setup(struct net_device *dev) ops->enqueue = teql_enqueue; ops->dequeue = teql_dequeue; ops->peek = teql_peek; - ops->requeue = teql_requeue; ops->init = teql_qdisc_init; ops->reset = teql_reset; ops->destroy = teql_destroy; -- cgit v1.2.3 From 31e889098a80ceb3e9e3c555d522b2686a6663c6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Nov 2008 16:21:19 -0800 Subject: ftrace: pass module struct to arch dynamic ftrace functions Impact: allow archs more flexibility on dynamic ftrace implementations Dynamic ftrace has largly been developed on x86. Since x86 does not have the same limitations as other architectures, the ftrace interaction between the generic code and the architecture specific code was not flexible enough to handle some of the issues that other architectures have. Most notably, module trampolines. Due to the limited branch distance that archs make in calling kernel core code from modules, the module load code must create a trampoline to jump to what will make the larger jump into core kernel code. The problem arises when this happens to a call to mcount. Ftrace checks all code before modifying it and makes sure the current code is what it expects. Right now, there is not enough information to handle modifying module trampolines. This patch changes the API between generic dynamic ftrace code and the arch dependent code. There is now two functions for modifying code: ftrace_make_nop(mod, rec, addr) - convert the code at rec->ip into a nop, where the original text is calling addr. (mod is the module struct if called by module init) ftrace_make_caller(rec, addr) - convert the code rec->ip that should be a nop into a caller to addr. The record "rec" now has a new field called "arch" where the architecture can add any special attributes to each call site record. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ftrace.h | 8 ++++++ arch/x86/kernel/ftrace.c | 29 +++++++++++++++++--- include/linux/ftrace.h | 53 +++++++++++++++++++++++++++--------- kernel/module.c | 2 +- kernel/trace/ftrace.c | 62 +++++++++++++++++-------------------------- 5 files changed, 100 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9b6a1fa19e70..2bb43b433e07 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -17,6 +17,14 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) */ return addr - 1; } + +#ifdef CONFIG_DYNAMIC_FTRACE + +struct dyn_arch_ftrace { + /* No extra data needed for x86 */ +}; + +#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index fe832738e1e2..762222ad1387 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -166,7 +166,7 @@ static int ftrace_calc_offset(long ip, long addr) return (int)(addr - ip); } -unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static union ftrace_code_union calc; @@ -311,12 +311,12 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; -unsigned char *ftrace_nop_replace(void) +static unsigned char *ftrace_nop_replace(void) { return ftrace_nop; } -int +static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { @@ -349,6 +349,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return 0; } +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char *new, *old; + unsigned long ip = rec->ip; + + old = ftrace_call_replace(ip, addr); + new = ftrace_nop_replace(); + + return ftrace_modify_code(rec->ip, old, new); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char *new, *old; + unsigned long ip = rec->ip; + + old = ftrace_nop_replace(); + new = ftrace_call_replace(ip, addr); + + return ftrace_modify_code(rec->ip, old, new); +} + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4fbc4a8b86a5..166a2070ef65 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -74,6 +74,9 @@ static inline void ftrace_start(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE +/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ +#include + enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FAILED = (1 << 1), @@ -88,6 +91,7 @@ struct dyn_ftrace { struct list_head list; unsigned long ip; /* address of mcount call-site */ unsigned long flags; + struct dyn_arch_ftrace arch; }; int ftrace_force_update(void); @@ -95,22 +99,40 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset); /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); -extern unsigned char *ftrace_nop_replace(void); -extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); extern int ftrace_dyn_arch_init(void *data); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); -/* May be defined in arch */ -extern int ftrace_arch_read_dyn_info(char *buf, int size); +/** + * ftrace_make_nop - convert code into top + * @mod: module structure if called by module load initialization + * @rec: the mcount call site record + * @addr: the address that the call site should be calling + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * The code segment at @rec->ip should be a caller to @addr + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +extern int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr); /** - * ftrace_modify_code - modify code segment - * @ip: the address of the code segment - * @old_code: the contents of what is expected to be there - * @new_code: the code to patch in + * ftrace_make_call - convert a nop call site into a call to addr + * @rec: the mcount call site record + * @addr: the address that the call site should call * * This is a very sensitive operation and great care needs * to be taken by the arch. The operation should carefully @@ -118,6 +140,8 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); * what we expect it to be, and then on success of the compare, * it should write to the location. * + * The code segment at @rec->ip should be a nop + * * Return must be: * 0 on success * -EFAULT on error reading the location @@ -125,8 +149,11 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); * -EPERM on error writing to the location * Any other value will be considered a failure. */ -extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, - unsigned char *new_code); +extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); + + +/* May be defined in arch */ +extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); @@ -259,11 +286,13 @@ static inline void ftrace_dump(void) { } #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); -extern void ftrace_init_module(unsigned long *start, unsigned long *end); +extern void ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end); #else static inline void ftrace_init(void) { } static inline void -ftrace_init_module(unsigned long *start, unsigned long *end) { } +ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end) { } #endif diff --git a/kernel/module.c b/kernel/module.c index 1f4cc00e0c20..69791274e899 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2201,7 +2201,7 @@ static noinline struct module *load_module(void __user *umod, /* sechdrs[0].sh_size is always zero */ mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", sizeof(*mseg), &num_mcount); - ftrace_init_module(mseg, mseg + num_mcount); + ftrace_init_module(mod, mseg, mseg + num_mcount); err = module_finalize(hdr, sechdrs, mod); if (err < 0) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3940c71ac2a2..e9a5fbfce08e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -358,9 +358,7 @@ static void print_ip_ins(const char *fmt, unsigned char *p) printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); } -static void ftrace_bug(int failed, unsigned long ip, - unsigned char *expected, - unsigned char *replace) +static void ftrace_bug(int failed, unsigned long ip) { switch (failed) { case -EFAULT: @@ -372,9 +370,7 @@ static void ftrace_bug(int failed, unsigned long ip, FTRACE_WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); print_ip_sym(ip); - print_ip_ins(" expected: ", expected); print_ip_ins(" actual: ", (unsigned char *)ip); - print_ip_ins(" replace: ", replace); printk(KERN_CONT "\n"); break; case -EPERM: @@ -392,8 +388,7 @@ static void ftrace_bug(int failed, unsigned long ip, #define FTRACE_ADDR ((long)(ftrace_caller)) static int -__ftrace_replace_code(struct dyn_ftrace *rec, - unsigned char *old, unsigned char *new, int enable) +__ftrace_replace_code(struct dyn_ftrace *rec, int enable) { unsigned long ip, fl; @@ -435,12 +430,10 @@ __ftrace_replace_code(struct dyn_ftrace *rec, * otherwise enable it! */ if (fl & FTRACE_FL_ENABLED) { - /* swap new and old */ - new = old; - old = ftrace_call_replace(ip, FTRACE_ADDR); + enable = 0; rec->flags &= ~FTRACE_FL_ENABLED; } else { - new = ftrace_call_replace(ip, FTRACE_ADDR); + enable = 1; rec->flags |= FTRACE_FL_ENABLED; } } else { @@ -453,10 +446,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED); if (fl == FTRACE_FL_NOTRACE) return 0; - - new = ftrace_call_replace(ip, FTRACE_ADDR); - } else - old = ftrace_call_replace(ip, FTRACE_ADDR); + } if (enable) { if (rec->flags & FTRACE_FL_ENABLED) @@ -469,21 +459,18 @@ __ftrace_replace_code(struct dyn_ftrace *rec, } } - return ftrace_modify_code(ip, old, new); + if (enable) + return ftrace_make_call(rec, FTRACE_ADDR); + else + return ftrace_make_nop(NULL, rec, FTRACE_ADDR); } static void ftrace_replace_code(int enable) { int i, failed; - unsigned char *new = NULL, *old = NULL; struct dyn_ftrace *rec; struct ftrace_page *pg; - if (enable) - old = ftrace_nop_replace(); - else - new = ftrace_nop_replace(); - for (pg = ftrace_pages_start; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { rec = &pg->records[i]; @@ -504,34 +491,30 @@ static void ftrace_replace_code(int enable) unfreeze_record(rec); } - failed = __ftrace_replace_code(rec, old, new, enable); + failed = __ftrace_replace_code(rec, enable); if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { rec->flags |= FTRACE_FL_FAILED; if ((system_state == SYSTEM_BOOTING) || !core_kernel_text(rec->ip)) { ftrace_free_rec(rec); } else - ftrace_bug(failed, rec->ip, old, new); + ftrace_bug(failed, rec->ip); } } } } static int -ftrace_code_disable(struct dyn_ftrace *rec) +ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) { unsigned long ip; - unsigned char *nop, *call; int ret; ip = rec->ip; - nop = ftrace_nop_replace(); - call = ftrace_call_replace(ip, mcount_addr); - - ret = ftrace_modify_code(ip, call, nop); + ret = ftrace_make_nop(mod, rec, mcount_addr); if (ret) { - ftrace_bug(ret, ip, call, nop); + ftrace_bug(ret, ip); rec->flags |= FTRACE_FL_FAILED; return 0; } @@ -650,7 +633,7 @@ static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; -static int ftrace_update_code(void) +static int ftrace_update_code(struct module *mod) { struct dyn_ftrace *p, *t; cycle_t start, stop; @@ -667,7 +650,7 @@ static int ftrace_update_code(void) list_del_init(&p->list); /* convert record (i.e, patch mcount-call with NOP) */ - if (ftrace_code_disable(p)) { + if (ftrace_code_disable(mod, p)) { p->flags |= FTRACE_FL_CONVERTED; ftrace_update_cnt++; } else @@ -1309,7 +1292,8 @@ static __init int ftrace_init_debugfs(void) fs_initcall(ftrace_init_debugfs); -static int ftrace_convert_nops(unsigned long *start, +static int ftrace_convert_nops(struct module *mod, + unsigned long *start, unsigned long *end) { unsigned long *p; @@ -1325,18 +1309,19 @@ static int ftrace_convert_nops(unsigned long *start, /* disable interrupts to prevent kstop machine */ local_irq_save(flags); - ftrace_update_code(); + ftrace_update_code(mod); local_irq_restore(flags); mutex_unlock(&ftrace_start_lock); return 0; } -void ftrace_init_module(unsigned long *start, unsigned long *end) +void ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end) { if (ftrace_disabled || start == end) return; - ftrace_convert_nops(start, end); + ftrace_convert_nops(mod, start, end); } extern unsigned long __start_mcount_loc[]; @@ -1366,7 +1351,8 @@ void __init ftrace_init(void) last_ftrace_enabled = ftrace_enabled = 1; - ret = ftrace_convert_nops(__start_mcount_loc, + ret = ftrace_convert_nops(NULL, + __start_mcount_loc, __stop_mcount_loc); return; -- cgit v1.2.3 From e7d3737ea1b102030f44e96c97754101e41515f0 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Nov 2008 06:02:06 +0100 Subject: tracing/function-return-tracer: support for dynamic ftrace on function return tracer This patch adds the support for dynamic tracing on the function return tracer. The whole difference with normal dynamic function tracing is that we don't need to hook on a particular callback. The only pro that we want is to nop or set dynamically the calls to ftrace_caller (which is ftrace_return_caller here). Some security checks ensure that we are not trying to launch dynamic tracing for return tracing while normal function tracing is already running. An example of trace with getnstimeofday set as a filter: ktime_get_ts+0x22/0x50 -> getnstimeofday (2283 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1396 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1825 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1426 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1464 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1524 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1382 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1434 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1464 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1502 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1404 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1397 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1051 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1314 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1344 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1163 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1390 ns) ktime_get_ts+0x22/0x50 -> getnstimeofday (1374 ns) Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 18 ++- arch/x86/kernel/ftrace.c | 258 +++++++++++++++++----------------- include/linux/ftrace.h | 16 ++- kernel/trace/Kconfig | 1 - kernel/trace/ftrace.c | 58 +++++++- kernel/trace/trace_functions_return.c | 15 +- 6 files changed, 211 insertions(+), 155 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f97621149839..74defe21ba42 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1190,7 +1190,7 @@ ENTRY(mcount) jnz trace #ifdef CONFIG_FUNCTION_RET_TRACER cmpl $ftrace_stub, ftrace_function_return - jnz trace_return + jnz ftrace_return_caller #endif .globl ftrace_stub ftrace_stub: @@ -1211,9 +1211,15 @@ trace: popl %ecx popl %eax jmp ftrace_stub +END(mcount) +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_RET_TRACER -trace_return: +ENTRY(ftrace_return_caller) + cmpl $0, function_trace_stop + jne ftrace_stub + pushl %eax pushl %ecx pushl %edx @@ -1223,7 +1229,8 @@ trace_return: popl %edx popl %ecx popl %eax - jmp ftrace_stub + ret +END(ftrace_return_caller) .globl return_to_handler return_to_handler: @@ -1237,10 +1244,7 @@ return_to_handler: popl %ecx popl %eax ret -#endif /* CONFIG_FUNCTION_RET_TRACER */ -END(mcount) -#endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* CONFIG_FUNCTION_TRACER */ +#endif .section .rodata,"a" #include "syscall_table_32.S" diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index d98b5a8ecf4c..924153edd973 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -24,134 +24,6 @@ #include - -#ifdef CONFIG_FUNCTION_RET_TRACER - -/* - * These functions are picked from those used on - * this page for dynamic ftrace. They have been - * simplified to ignore all traces in NMI context. - */ -static atomic_t in_nmi; - -void ftrace_nmi_enter(void) -{ - atomic_inc(&in_nmi); -} - -void ftrace_nmi_exit(void) -{ - atomic_dec(&in_nmi); -} - -/* Add a function return address to the trace stack on thread info.*/ -static int push_return_trace(unsigned long ret, unsigned long long time, - unsigned long func) -{ - int index; - struct thread_info *ti = current_thread_info(); - - /* The return trace stack is full */ - if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) - return -EBUSY; - - index = ++ti->curr_ret_stack; - barrier(); - ti->ret_stack[index].ret = ret; - ti->ret_stack[index].func = func; - ti->ret_stack[index].calltime = time; - - return 0; -} - -/* Retrieve a function return address to the trace stack on thread info.*/ -static void pop_return_trace(unsigned long *ret, unsigned long long *time, - unsigned long *func) -{ - int index; - - struct thread_info *ti = current_thread_info(); - index = ti->curr_ret_stack; - *ret = ti->ret_stack[index].ret; - *func = ti->ret_stack[index].func; - *time = ti->ret_stack[index].calltime; - ti->curr_ret_stack--; -} - -/* - * Send the trace to the ring-buffer. - * @return the original return address. - */ -unsigned long ftrace_return_to_handler(void) -{ - struct ftrace_retfunc trace; - pop_return_trace(&trace.ret, &trace.calltime, &trace.func); - trace.rettime = cpu_clock(raw_smp_processor_id()); - ftrace_function_return(&trace); - - return trace.ret; -} - -/* - * Hook the return address and push it in the stack of return addrs - * in current thread info. - */ -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) -{ - unsigned long old; - unsigned long long calltime; - int faulted; - unsigned long return_hooker = (unsigned long) - &return_to_handler; - - /* Nmi's are currently unsupported */ - if (atomic_read(&in_nmi)) - return; - - /* - * Protect against fault, even if it shouldn't - * happen. This tool is too much intrusive to - * ignore such a protection. - */ - asm volatile( - "1: movl (%[parent_old]), %[old]\n" - "2: movl %[return_hooker], (%[parent_replaced])\n" - " movl $0, %[faulted]\n" - - ".section .fixup, \"ax\"\n" - "3: movl $1, %[faulted]\n" - ".previous\n" - - ".section __ex_table, \"a\"\n" - " .long 1b, 3b\n" - " .long 2b, 3b\n" - ".previous\n" - - : [parent_replaced] "=r" (parent), [old] "=r" (old), - [faulted] "=r" (faulted) - : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) - : "memory" - ); - - if (WARN_ON(faulted)) { - unregister_ftrace_return(); - return; - } - - if (WARN_ON(!__kernel_text_address(old))) { - unregister_ftrace_return(); - *parent = old; - return; - } - - calltime = cpu_clock(raw_smp_processor_id()); - - if (push_return_trace(old, calltime, self_addr) == -EBUSY) - *parent = old; -} - -#endif - #ifdef CONFIG_DYNAMIC_FTRACE union ftrace_code_union { @@ -450,3 +322,133 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } #endif + +#ifdef CONFIG_FUNCTION_RET_TRACER + +#ifndef CONFIG_DYNAMIC_FTRACE + +/* + * These functions are picked from those used on + * this page for dynamic ftrace. They have been + * simplified to ignore all traces in NMI context. + */ +static atomic_t in_nmi; + +void ftrace_nmi_enter(void) +{ + atomic_inc(&in_nmi); +} + +void ftrace_nmi_exit(void) +{ + atomic_dec(&in_nmi); +} +#endif /* !CONFIG_DYNAMIC_FTRACE */ + +/* Add a function return address to the trace stack on thread info.*/ +static int push_return_trace(unsigned long ret, unsigned long long time, + unsigned long func) +{ + int index; + struct thread_info *ti = current_thread_info(); + + /* The return trace stack is full */ + if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) + return -EBUSY; + + index = ++ti->curr_ret_stack; + barrier(); + ti->ret_stack[index].ret = ret; + ti->ret_stack[index].func = func; + ti->ret_stack[index].calltime = time; + + return 0; +} + +/* Retrieve a function return address to the trace stack on thread info.*/ +static void pop_return_trace(unsigned long *ret, unsigned long long *time, + unsigned long *func) +{ + int index; + + struct thread_info *ti = current_thread_info(); + index = ti->curr_ret_stack; + *ret = ti->ret_stack[index].ret; + *func = ti->ret_stack[index].func; + *time = ti->ret_stack[index].calltime; + ti->curr_ret_stack--; +} + +/* + * Send the trace to the ring-buffer. + * @return the original return address. + */ +unsigned long ftrace_return_to_handler(void) +{ + struct ftrace_retfunc trace; + pop_return_trace(&trace.ret, &trace.calltime, &trace.func); + trace.rettime = cpu_clock(raw_smp_processor_id()); + ftrace_function_return(&trace); + + return trace.ret; +} + +/* + * Hook the return address and push it in the stack of return addrs + * in current thread info. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) +{ + unsigned long old; + unsigned long long calltime; + int faulted; + unsigned long return_hooker = (unsigned long) + &return_to_handler; + + /* Nmi's are currently unsupported */ + if (atomic_read(&in_nmi)) + return; + + /* + * Protect against fault, even if it shouldn't + * happen. This tool is too much intrusive to + * ignore such a protection. + */ + asm volatile( + "1: movl (%[parent_old]), %[old]\n" + "2: movl %[return_hooker], (%[parent_replaced])\n" + " movl $0, %[faulted]\n" + + ".section .fixup, \"ax\"\n" + "3: movl $1, %[faulted]\n" + ".previous\n" + + ".section __ex_table, \"a\"\n" + " .long 1b, 3b\n" + " .long 2b, 3b\n" + ".previous\n" + + : [parent_replaced] "=r" (parent), [old] "=r" (old), + [faulted] "=r" (faulted) + : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) + : "memory" + ); + + if (WARN_ON(faulted)) { + unregister_ftrace_return(); + return; + } + + if (WARN_ON(!__kernel_text_address(old))) { + unregister_ftrace_return(); + *parent = old; + return; + } + + calltime = cpu_clock(raw_smp_processor_id()); + + if (push_return_trace(old, calltime, self_addr) == -EBUSY) + *parent = old; +} + +#endif /* CONFIG_FUNCTION_RET_TRACER */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 166a2070ef65..f1af1aab00e6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -25,6 +25,17 @@ struct ftrace_ops { extern int function_trace_stop; +/* + * Type of the current tracing. + */ +enum ftrace_tracing_type_t { + FTRACE_TYPE_ENTER = 0, /* Hook the call of the function */ + FTRACE_TYPE_RETURN, /* Hook the return of the function */ +}; + +/* Current tracing type, default is FTRACE_TYPE_ENTER */ +extern enum ftrace_tracing_type_t ftrace_tracing_type; + /** * ftrace_stop - stop function tracer. * @@ -104,6 +115,9 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +#ifdef CONFIG_FUNCTION_RET_TRACER +extern void ftrace_return_caller(void); +#endif /** * ftrace_make_nop - convert code into top @@ -310,7 +324,7 @@ struct ftrace_retfunc { /* Type of a callback handler of tracing return function */ typedef void (*trace_function_return_t)(struct ftrace_retfunc *); -extern void register_ftrace_return(trace_function_return_t func); +extern int register_ftrace_return(trace_function_return_t func); /* The current handler in use */ extern trace_function_return_t ftrace_function_return; extern void unregister_ftrace_return(void); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 9c89526b6b7c..b8378fad29a3 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -59,7 +59,6 @@ config FUNCTION_TRACER config FUNCTION_RET_TRACER bool "Kernel Function return Tracer" - depends on !DYNAMIC_FTRACE depends on HAVE_FUNCTION_RET_TRACER depends on FUNCTION_TRACER help diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b42ec1de546b..2f78a45aac14 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -50,6 +50,9 @@ static int last_ftrace_enabled; /* Quick disabling of function tracer. */ int function_trace_stop; +/* By default, current tracing type is normal tracing. */ +enum ftrace_tracing_type_t ftrace_tracing_type = FTRACE_TYPE_ENTER; + /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -385,12 +388,21 @@ static void ftrace_bug(int failed, unsigned long ip) } } -#define FTRACE_ADDR ((long)(ftrace_caller)) static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) { unsigned long ip, fl; + unsigned long ftrace_addr; + +#ifdef CONFIG_FUNCTION_RET_TRACER + if (ftrace_tracing_type == FTRACE_TYPE_ENTER) + ftrace_addr = (unsigned long)ftrace_caller; + else + ftrace_addr = (unsigned long)ftrace_return_caller; +#else + ftrace_addr = (unsigned long)ftrace_caller; +#endif ip = rec->ip; @@ -450,9 +462,9 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) } if (rec->flags & FTRACE_FL_ENABLED) - return ftrace_make_call(rec, FTRACE_ADDR); + return ftrace_make_call(rec, ftrace_addr); else - return ftrace_make_nop(NULL, rec, FTRACE_ADDR); + return ftrace_make_nop(NULL, rec, ftrace_addr); } static void ftrace_replace_code(int enable) @@ -1405,10 +1417,17 @@ int register_ftrace_function(struct ftrace_ops *ops) return -1; mutex_lock(&ftrace_sysctl_lock); + + if (ftrace_tracing_type == FTRACE_TYPE_RETURN) { + ret = -EBUSY; + goto out; + } + ret = __register_ftrace_function(ops); ftrace_startup(); - mutex_unlock(&ftrace_sysctl_lock); +out: + mutex_unlock(&ftrace_sysctl_lock); return ret; } @@ -1474,16 +1493,45 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, } #ifdef CONFIG_FUNCTION_RET_TRACER + +/* The callback that hooks the return of a function */ trace_function_return_t ftrace_function_return = (trace_function_return_t)ftrace_stub; -void register_ftrace_return(trace_function_return_t func) + +int register_ftrace_return(trace_function_return_t func) { + int ret = 0; + + mutex_lock(&ftrace_sysctl_lock); + + /* + * Don't launch return tracing if normal function + * tracing is already running. + */ + if (ftrace_trace_function != ftrace_stub) { + ret = -EBUSY; + goto out; + } + + ftrace_tracing_type = FTRACE_TYPE_RETURN; ftrace_function_return = func; + ftrace_startup(); + +out: + mutex_unlock(&ftrace_sysctl_lock); + return ret; } void unregister_ftrace_return(void) { + mutex_lock(&ftrace_sysctl_lock); + ftrace_function_return = (trace_function_return_t)ftrace_stub; + ftrace_shutdown(); + /* Restore normal tracing type */ + ftrace_tracing_type = FTRACE_TYPE_ENTER; + + mutex_unlock(&ftrace_sysctl_lock); } #endif diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c index 61185f756a13..a68564af022b 100644 --- a/kernel/trace/trace_functions_return.c +++ b/kernel/trace/trace_functions_return.c @@ -14,29 +14,18 @@ #include "trace.h" -static void start_return_trace(struct trace_array *tr) -{ - register_ftrace_return(&trace_function_return); -} - -static void stop_return_trace(struct trace_array *tr) -{ - unregister_ftrace_return(); -} - static int return_trace_init(struct trace_array *tr) { int cpu; for_each_online_cpu(cpu) tracing_reset(tr, cpu); - start_return_trace(tr); - return 0; + return register_ftrace_return(&trace_function_return); } static void return_trace_reset(struct trace_array *tr) { - stop_return_trace(tr); + unregister_ftrace_return(); } -- cgit v1.2.3 From 954e100d2275cb2f150f2b18d5cddcdf67b956ac Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:34 -0500 Subject: rcu: add rcu_read_*_sched_notrace() Impact: new API, useful for tracepoints and markers. Add _notrace version to rcu_read_*_sched(). Signed-off-by: Mathieu Desnoyers Reviewed-by: Paul E McKenney Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 86f1f5e43e33..895dc9c1088c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -142,6 +142,7 @@ struct rcu_head { * on the write-side to insure proper synchronization. */ #define rcu_read_lock_sched() preempt_disable() +#define rcu_read_lock_sched_notrace() preempt_disable_notrace() /* * rcu_read_unlock_sched - marks the end of a RCU-classic critical section @@ -149,6 +150,7 @@ struct rcu_head { * See rcu_read_lock_sched for more information. */ #define rcu_read_unlock_sched() preempt_enable() +#define rcu_read_unlock_sched_notrace() preempt_enable_notrace() -- cgit v1.2.3 From e3f8c4b9117d70127a8cab480af83bbfd048a28b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 14 Nov 2008 17:47:36 -0500 Subject: markers: add missing stdargs.h include, needed due to va_list usage Impact: build fix (for future changes) That seemed to cause built issue when marker.h is included early, even though stdargs.h is included in kernel.h. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/marker.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/marker.h b/include/linux/marker.h index 4cf45472d9f5..05ec0df37089 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -12,6 +12,7 @@ * See the file COPYING for more details. */ +#include #include struct module; -- cgit v1.2.3 From c1df1bd2c4d4b20c83755a0f41956b57aec4842a Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:39 -0500 Subject: markers: auto enable tracepoints (new API : trace_mark_tp()) Impact: new API Add a new API trace_mark_tp(), which declares a marker within a tracepoint probe. When the marker is activated, the tracepoint is automatically enabled. No branch test is used at the marker site, because it would be a duplicate of the branch already present in the tracepoint. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/marker.h | 45 +++++++++++++++++++++++++++++++++++++++++- init/Kconfig | 1 + kernel/marker.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 96 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/marker.h b/include/linux/marker.h index 05ec0df37089..57a307018ceb 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -49,6 +49,8 @@ struct marker { void (*call)(const struct marker *mdata, void *call_private, ...); struct marker_probe_closure single; struct marker_probe_closure *multi; + const char *tp_name; /* Optional tracepoint name */ + void *tp_cb; /* Optional tracepoint callback */ } __attribute__((aligned(8))); #ifdef CONFIG_MARKERS @@ -73,7 +75,7 @@ struct marker { __attribute__((section("__markers"), aligned(8))) = \ { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ 0, 0, marker_probe_cb, \ - { __mark_empty_function, NULL}, NULL }; \ + { __mark_empty_function, NULL}, NULL, NULL, NULL }; \ __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ (*__mark_##name.call) \ @@ -81,11 +83,38 @@ struct marker { } \ } while (0) +#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ + do { \ + void __check_tp_type(void) \ + { \ + register_trace_##tp_name(tp_cb); \ + } \ + static const char __mstrtab_##name[] \ + __attribute__((section("__markers_strings"))) \ + = #name "\0" format; \ + static struct marker __mark_##name \ + __attribute__((section("__markers"), aligned(8))) = \ + { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ + 0, 0, marker_probe_cb, \ + { __mark_empty_function, NULL}, NULL, #tp_name, tp_cb };\ + __mark_check_format(format, ## args); \ + (*__mark_##name.call)(&__mark_##name, call_private, \ + ## args); \ + } while (0) + extern void marker_update_probe_range(struct marker *begin, struct marker *end); #else /* !CONFIG_MARKERS */ #define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) +#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ + do { \ + void __check_tp_type(void) \ + { \ + register_trace_##tp_name(tp_cb); \ + } \ + __mark_check_format(format, ## args); \ + } while (0) static inline void marker_update_probe_range(struct marker *begin, struct marker *end) { } @@ -117,6 +146,20 @@ static inline void marker_update_probe_range(struct marker *begin, #define _trace_mark(name, format, args...) \ __trace_mark(1, name, NULL, format, ## args) +/** + * trace_mark_tp - Marker in a tracepoint callback + * @name: marker name, not quoted. + * @tp_name: tracepoint name, not quoted. + * @tp_cb: tracepoint callback. Should have an associated global symbol so it + * is not optimized away by the compiler (should not be static). + * @format: format string + * @args...: variable argument list + * + * Places a marker in a tracepoint callback. + */ +#define trace_mark_tp(name, tp_name, tp_cb, format, args...) \ + __trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args) + /** * MARK_NOARGS - Format string for a marker with no argument. */ diff --git a/init/Kconfig b/init/Kconfig index 86b00c53fade..f5bacb438711 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -808,6 +808,7 @@ config TRACEPOINTS config MARKERS bool "Activate markers" + depends on TRACEPOINTS help Place an empty function call at each marker site. Can be dynamically changed for a probe function. diff --git a/kernel/marker.c b/kernel/marker.c index 348e70cc355a..c14ec26a9b9f 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -479,7 +479,7 @@ static int marker_set_format(struct marker_entry *entry, const char *format) static int set_marker(struct marker_entry *entry, struct marker *elem, int active) { - int ret; + int ret = 0; WARN_ON(strcmp(entry->name, elem->name) != 0); if (entry->format) { @@ -531,9 +531,40 @@ static int set_marker(struct marker_entry *entry, struct marker *elem, */ smp_wmb(); elem->ptype = entry->ptype; + + if (elem->tp_name && (active ^ elem->state)) { + WARN_ON(!elem->tp_cb); + /* + * It is ok to directly call the probe registration because type + * checking has been done in the __trace_mark_tp() macro. + */ + + if (active) { + /* + * try_module_get should always succeed because we hold + * lock_module() to get the tp_cb address. + */ + ret = try_module_get(__module_text_address( + (unsigned long)elem->tp_cb)); + BUG_ON(!ret); + ret = tracepoint_probe_register_noupdate( + elem->tp_name, + elem->tp_cb); + } else { + ret = tracepoint_probe_unregister_noupdate( + elem->tp_name, + elem->tp_cb); + /* + * tracepoint_probe_update_all() must be called + * before the module containing tp_cb is unloaded. + */ + module_put(__module_text_address( + (unsigned long)elem->tp_cb)); + } + } elem->state = active; - return 0; + return ret; } /* @@ -544,7 +575,24 @@ static int set_marker(struct marker_entry *entry, struct marker *elem, */ static void disable_marker(struct marker *elem) { + int ret; + /* leave "call" as is. It is known statically. */ + if (elem->tp_name && elem->state) { + WARN_ON(!elem->tp_cb); + /* + * It is ok to directly call the probe registration because type + * checking has been done in the __trace_mark_tp() macro. + */ + ret = tracepoint_probe_unregister_noupdate(elem->tp_name, + elem->tp_cb); + WARN_ON(ret); + /* + * tracepoint_probe_update_all() must be called + * before the module containing tp_cb is unloaded. + */ + module_put(__module_text_address((unsigned long)elem->tp_cb)); + } elem->state = 0; elem->single.func = __mark_empty_function; /* Update the function before setting the ptype */ @@ -608,6 +656,7 @@ static void marker_update_probes(void) marker_update_probe_range(__start___markers, __stop___markers); /* Markers in modules. */ module_update_markers(); + tracepoint_probe_update_all(); } /** -- cgit v1.2.3 From a0bca6a59ebc052751eed6e3b182c153495672d8 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:40 -0500 Subject: markers: create DEFINE_MARKER and GET_MARKER (new API) Impact: new API. Allow markers to be used only for declaration, without function call associated. Useful to create specialized probes. The problem we had is that two function calls were required when one wanted to put a marker in a tracepoint probe. Now the marker can be used simply for trace data type declaration, leaving the trace write work within the tracepoint probe without any additional function call. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- Documentation/markers.txt | 14 ++++++++++++++ include/linux/marker.h | 39 +++++++++++++++++++++++---------------- 2 files changed, 37 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/Documentation/markers.txt b/Documentation/markers.txt index 089f6138fcd9..6d275e4ef385 100644 --- a/Documentation/markers.txt +++ b/Documentation/markers.txt @@ -70,6 +70,20 @@ a printk warning which identifies the inconsistency: "Format mismatch for probe probe_name (format), marker (format)" +Another way to use markers is to simply define the marker without generating any +function call to actually call into the marker. This is useful in combination +with tracepoint probes in a scheme like this : + +void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk); + +DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name, + "arg1 %u pid %d"); + +notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk) +{ + struct marker *marker = &GET_MARKER(kernel_irq_entry); + /* write data to trace buffers ... */ +} * Probe / marker example diff --git a/include/linux/marker.h b/include/linux/marker.h index 57a307018ceb..34c14bc957f5 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -55,6 +55,22 @@ struct marker { #ifdef CONFIG_MARKERS +#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format) \ + static const char __mstrtab_##name[] \ + __attribute__((section("__markers_strings"))) \ + = #name "\0" format; \ + static struct marker __mark_##name \ + __attribute__((section("__markers"), aligned(8))) = \ + { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ + 0, 0, marker_probe_cb, { __mark_empty_function, NULL},\ + NULL, tp_name_str, tp_cb } + +#define DEFINE_MARKER(name, format) \ + _DEFINE_MARKER(name, NULL, NULL, format) + +#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format) \ + _DEFINE_MARKER(name, #tp_name, tp_cb, format) + /* * Note : the empty asm volatile with read constraint is used here instead of a * "used" attribute to fix a gcc 4.1.x bug. @@ -68,14 +84,7 @@ struct marker { */ #define __trace_mark(generic, name, call_private, format, args...) \ do { \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, \ - { __mark_empty_function, NULL}, NULL, NULL, NULL }; \ + DEFINE_MARKER(name, format); \ __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ (*__mark_##name.call) \ @@ -89,14 +98,7 @@ struct marker { { \ register_trace_##tp_name(tp_cb); \ } \ - static const char __mstrtab_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name "\0" format; \ - static struct marker __mark_##name \ - __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ - 0, 0, marker_probe_cb, \ - { __mark_empty_function, NULL}, NULL, #tp_name, tp_cb };\ + DEFINE_MARKER_TP(name, tp_name, tp_cb, format); \ __mark_check_format(format, ## args); \ (*__mark_##name.call)(&__mark_##name, call_private, \ ## args); \ @@ -104,7 +106,11 @@ struct marker { extern void marker_update_probe_range(struct marker *begin, struct marker *end); + +#define GET_MARKER(name) (__mark_##name) + #else /* !CONFIG_MARKERS */ +#define DEFINE_MARKER(name, tp_name, tp_cb, format) #define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) #define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \ @@ -118,6 +124,7 @@ extern void marker_update_probe_range(struct marker *begin, static inline void marker_update_probe_range(struct marker *begin, struct marker *end) { } +#define GET_MARKER(name) #endif /* CONFIG_MARKERS */ /** -- cgit v1.2.3 From da7b3eab167091693ad215ad7692f7d0d24d1356 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:43 -0500 Subject: tracepoints: use rcu_*_sched_notrace Make sure tracepoints can be called within ftrace callbacks. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 63064e9403f2..69648c54a326 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -40,14 +40,14 @@ struct tracepoint { do { \ void **it_func; \ \ - rcu_read_lock_sched(); \ + rcu_read_lock_sched_notrace(); \ it_func = rcu_dereference((tp)->funcs); \ if (it_func) { \ do { \ ((void(*)(proto))(*it_func))(args); \ } while (*(++it_func)); \ } \ - rcu_read_unlock_sched(); \ + rcu_read_unlock_sched_notrace(); \ } while (0) /* -- cgit v1.2.3 From c420970ef476d7d68df119711700666224001f43 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:44 -0500 Subject: tracepoints: use unregister return value Impact: bugfix. Unregistering a tracepoint can fail. Return the error value. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 69648c54a326..c60a791f8874 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -73,9 +73,9 @@ struct tracepoint { return tracepoint_probe_register(#name ":" #proto, \ (void *)probe); \ } \ - static inline void unregister_trace_##name(void (*probe)(proto))\ + static inline int unregister_trace_##name(void (*probe)(proto)) \ { \ - tracepoint_probe_unregister(#name ":" #proto, \ + return tracepoint_probe_unregister(#name ":" #proto, \ (void *)probe); \ } @@ -92,8 +92,10 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, { \ return -ENOSYS; \ } \ - static inline void unregister_trace_##name(void (*probe)(proto))\ - { } + static inline int unregister_trace_##name(void (*probe)(proto)) \ + { \ + return -ENOSYS; \ + } static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) -- cgit v1.2.3 From 5f382671def7cb9c0f4b75d586dc5f60dca5e1c3 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:45 -0500 Subject: tracepoints: do not put arguments in name Impact: cleanup That's overkill, takes space. We have a global tracepoint registery in header files anyway. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- include/linux/tracepoint.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index c60a791f8874..7e9b42aeae0e 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -60,7 +60,7 @@ struct tracepoint { { \ static const char __tpstrtab_##name[] \ __attribute__((section("__tracepoints_strings"))) \ - = #name ":" #proto; \ + = #name; \ static struct tracepoint __tracepoint_##name \ __attribute__((section("__tracepoints"), aligned(8))) = \ { __tpstrtab_##name, 0, NULL }; \ @@ -70,13 +70,11 @@ struct tracepoint { } \ static inline int register_trace_##name(void (*probe)(proto)) \ { \ - return tracepoint_probe_register(#name ":" #proto, \ - (void *)probe); \ + return tracepoint_probe_register(#name, (void *)probe); \ } \ static inline int unregister_trace_##name(void (*probe)(proto)) \ { \ - return tracepoint_probe_unregister(#name ":" #proto, \ - (void *)probe); \ + return tracepoint_probe_unregister(#name, (void *)probe);\ } extern void tracepoint_update_probe_range(struct tracepoint *begin, -- cgit v1.2.3 From 7e066fb870fcd1025ec3ba7bbde5d541094f4ce1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Fri, 14 Nov 2008 17:47:47 -0500 Subject: tracepoints: add DECLARE_TRACE() and DEFINE_TRACE() Impact: API *CHANGE*. Must update all tracepoint users. Add DEFINE_TRACE() to tracepoints to let them declare the tracepoint structure in a single spot for all the kernel. It helps reducing memory consumption, especially when declaring a lot of tracepoints, e.g. for kmalloc tracing. *API CHANGE WARNING*: now, DECLARE_TRACE() must be used in headers for tracepoint declarations rather than DEFINE_TRACE(). This is the sane way to do it. The name previously used was misleading. Updates scheduler instrumentation to follow this API change. Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar --- Documentation/tracepoints.txt | 7 ++++++- include/asm-generic/vmlinux.lds.h | 1 + include/linux/tracepoint.h | 35 +++++++++++++++++++++++---------- include/trace/sched.h | 24 +++++++++++----------- kernel/exit.c | 4 ++++ kernel/fork.c | 2 ++ kernel/kthread.c | 3 +++ kernel/sched.c | 6 ++++++ kernel/signal.c | 2 ++ samples/tracepoints/tp-samples-trace.h | 4 ++-- samples/tracepoints/tracepoint-sample.c | 3 +++ 11 files changed, 66 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt index 5d354e167494..e8ad47b437f3 100644 --- a/Documentation/tracepoints.txt +++ b/Documentation/tracepoints.txt @@ -42,7 +42,7 @@ In include/trace/subsys.h : #include -DEFINE_TRACE(subsys_eventname, +DECLARE_TRACE(subsys_eventname, TPPTOTO(int firstarg, struct task_struct *p), TPARGS(firstarg, p)); @@ -50,6 +50,8 @@ In subsys/file.c (where the tracing statement must be added) : #include +DEFINE_TRACE(subsys_eventname); + void somefct(void) { ... @@ -86,6 +88,9 @@ to limit collisions. Tracepoint names are global to the kernel: they are considered as being the same whether they are in the core kernel image or in modules. +If the tracepoint has to be used in kernel modules, an +EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be used to +export the defined tracepoints. * Probe / tracepoint example diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a5e4ed9baec8..3b46ae464933 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -71,6 +71,7 @@ VMLINUX_SYMBOL(__start___markers) = .; \ *(__markers) \ VMLINUX_SYMBOL(__stop___markers) = .; \ + . = ALIGN(32); \ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ VMLINUX_SYMBOL(__stop___tracepoints) = .; \ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 7e9b42aeae0e..757005458366 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -24,8 +24,12 @@ struct tracepoint { const char *name; /* Tracepoint name */ int state; /* State. */ void **funcs; -} __attribute__((aligned(8))); - +} __attribute__((aligned(32))); /* + * Aligned on 32 bytes because it is + * globally visible and gcc happily + * align these on the structure size. + * Keep in sync with vmlinux.lds.h. + */ #define TPPROTO(args...) args #define TPARGS(args...) args @@ -55,15 +59,10 @@ struct tracepoint { * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. */ -#define DEFINE_TRACE(name, proto, args) \ +#define DECLARE_TRACE(name, proto, args) \ + extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - static const char __tpstrtab_##name[] \ - __attribute__((section("__tracepoints_strings"))) \ - = #name; \ - static struct tracepoint __tracepoint_##name \ - __attribute__((section("__tracepoints"), aligned(8))) = \ - { __tpstrtab_##name, 0, NULL }; \ if (unlikely(__tracepoint_##name.state)) \ __DO_TRACE(&__tracepoint_##name, \ TPPROTO(proto), TPARGS(args)); \ @@ -77,11 +76,23 @@ struct tracepoint { return tracepoint_probe_unregister(#name, (void *)probe);\ } +#define DEFINE_TRACE(name) \ + static const char __tpstrtab_##name[] \ + __attribute__((section("__tracepoints_strings"))) = #name; \ + struct tracepoint __tracepoint_##name \ + __attribute__((section("__tracepoints"), aligned(32))) = \ + { __tpstrtab_##name, 0, NULL } + +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ + EXPORT_SYMBOL_GPL(__tracepoint_##name) +#define EXPORT_TRACEPOINT_SYMBOL(name) \ + EXPORT_SYMBOL(__tracepoint_##name) + extern void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end); #else /* !CONFIG_TRACEPOINTS */ -#define DEFINE_TRACE(name, proto, args) \ +#define DECLARE_TRACE(name, proto, args) \ static inline void _do_trace_##name(struct tracepoint *tp, proto) \ { } \ static inline void trace_##name(proto) \ @@ -95,6 +106,10 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin, return -ENOSYS; \ } +#define DEFINE_TRACE(name) +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) +#define EXPORT_TRACEPOINT_SYMBOL(name) + static inline void tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) { } diff --git a/include/trace/sched.h b/include/trace/sched.h index ad47369d01b5..9b2854abf7e2 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -4,52 +4,52 @@ #include #include -DEFINE_TRACE(sched_kthread_stop, +DECLARE_TRACE(sched_kthread_stop, TPPROTO(struct task_struct *t), TPARGS(t)); -DEFINE_TRACE(sched_kthread_stop_ret, +DECLARE_TRACE(sched_kthread_stop_ret, TPPROTO(int ret), TPARGS(ret)); -DEFINE_TRACE(sched_wait_task, +DECLARE_TRACE(sched_wait_task, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_wakeup, +DECLARE_TRACE(sched_wakeup, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_wakeup_new, +DECLARE_TRACE(sched_wakeup_new, TPPROTO(struct rq *rq, struct task_struct *p), TPARGS(rq, p)); -DEFINE_TRACE(sched_switch, +DECLARE_TRACE(sched_switch, TPPROTO(struct rq *rq, struct task_struct *prev, struct task_struct *next), TPARGS(rq, prev, next)); -DEFINE_TRACE(sched_migrate_task, +DECLARE_TRACE(sched_migrate_task, TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu), TPARGS(rq, p, dest_cpu)); -DEFINE_TRACE(sched_process_free, +DECLARE_TRACE(sched_process_free, TPPROTO(struct task_struct *p), TPARGS(p)); -DEFINE_TRACE(sched_process_exit, +DECLARE_TRACE(sched_process_exit, TPPROTO(struct task_struct *p), TPARGS(p)); -DEFINE_TRACE(sched_process_wait, +DECLARE_TRACE(sched_process_wait, TPPROTO(struct pid *pid), TPARGS(pid)); -DEFINE_TRACE(sched_process_fork, +DECLARE_TRACE(sched_process_fork, TPPROTO(struct task_struct *parent, struct task_struct *child), TPARGS(parent, child)); -DEFINE_TRACE(sched_signal_send, +DECLARE_TRACE(sched_signal_send, TPPROTO(int sig, struct task_struct *p), TPARGS(sig, p)); diff --git a/kernel/exit.c b/kernel/exit.c index ae2b92be5fae..f995d2418668 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -54,6 +54,10 @@ #include #include +DEFINE_TRACE(sched_process_free); +DEFINE_TRACE(sched_process_exit); +DEFINE_TRACE(sched_process_wait); + static void exit_mm(struct task_struct * tsk); static inline int task_detached(struct task_struct *p) diff --git a/kernel/fork.c b/kernel/fork.c index f6083561dfe0..0837d0deee5f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -79,6 +79,8 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0; __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ +DEFINE_TRACE(sched_process_fork); + int nr_processes(void) { int cpu; diff --git a/kernel/kthread.c b/kernel/kthread.c index 8e7a7ce3ed0a..4fbc456f393d 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -21,6 +21,9 @@ static DEFINE_SPINLOCK(kthread_create_lock); static LIST_HEAD(kthread_create_list); struct task_struct *kthreadd_task; +DEFINE_TRACE(sched_kthread_stop); +DEFINE_TRACE(sched_kthread_stop_ret); + struct kthread_create_info { /* Information passed to kthread() from kthreadd. */ diff --git a/kernel/sched.c b/kernel/sched.c index 50a21f964679..327f91c63c99 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -118,6 +118,12 @@ */ #define RUNTIME_INF ((u64)~0ULL) +DEFINE_TRACE(sched_wait_task); +DEFINE_TRACE(sched_wakeup); +DEFINE_TRACE(sched_wakeup_new); +DEFINE_TRACE(sched_switch); +DEFINE_TRACE(sched_migrate_task); + #ifdef CONFIG_SMP /* * Divide a load by a sched group cpu_power : (load / sg->__cpu_power) diff --git a/kernel/signal.c b/kernel/signal.c index 4530fc654455..e9afe63da24b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -41,6 +41,8 @@ static struct kmem_cache *sigqueue_cachep; +DEFINE_TRACE(sched_signal_send); + static void __user *sig_handler(struct task_struct *t, int sig) { return t->sighand->action[sig - 1].sa.sa_handler; diff --git a/samples/tracepoints/tp-samples-trace.h b/samples/tracepoints/tp-samples-trace.h index 0216b55bd640..01724e04c556 100644 --- a/samples/tracepoints/tp-samples-trace.h +++ b/samples/tracepoints/tp-samples-trace.h @@ -4,10 +4,10 @@ #include /* for struct inode and struct file */ #include -DEFINE_TRACE(subsys_event, +DECLARE_TRACE(subsys_event, TPPROTO(struct inode *inode, struct file *file), TPARGS(inode, file)); -DEFINE_TRACE(subsys_eventb, +DECLARE_TRACE(subsys_eventb, TPPROTO(void), TPARGS()); #endif diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c index 4ae4b7fcc043..00d169792a3e 100644 --- a/samples/tracepoints/tracepoint-sample.c +++ b/samples/tracepoints/tracepoint-sample.c @@ -13,6 +13,9 @@ #include #include "tp-samples-trace.h" +DEFINE_TRACE(subsys_event); +DEFINE_TRACE(subsys_eventb); + struct proc_dir_entry *pentry_example; static int my_open(struct inode *inode, struct file *file) -- cgit v1.2.3 From f004f3ea34209d8b836426b26ade3dc502631b18 Mon Sep 17 00:00:00 2001 From: Paulius Zaleckas Date: Fri, 14 Nov 2008 00:24:34 +0000 Subject: phylib: make mdio-gpio work without OF (v4) make mdio-gpio work with non OpenFirmware gpio implementation. Aditional changes to mdio-gpio: - use gpio_request() and gpio_free() - place irq[] array in struct mdio_gpio_info - add module description, author and license - add note about compiling this driver as module - rename mdc and mdio function (were ugly names) - change MII to MDIO in bus name - add __init __exit to module (un)loading functions - probe fails if no phys added to the bus - kzalloc bitbang with sizeof(*bitbang) Changes since v3: - keep bus naming "%x" to be compatible with existing drivers. Changes since v2: - more #ifdefs reduction - platform driver will be registered on OF platforms also - unified platform and OF bus_id to phy%i Changes since v1: - removed NO_IRQ - reduced #idefs Laurent, please test this driver under OF. Signed-off-by: Paulius Zaleckas Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 5 +- drivers/net/phy/mdio-gpio.c | 232 +++++++++++++++++++++++++++++++------------- include/linux/mdio-gpio.h | 25 +++++ 3 files changed, 191 insertions(+), 71 deletions(-) create mode 100644 include/linux/mdio-gpio.h (limited to 'include') diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 031807751991..c4c5a2f8ec74 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -86,8 +86,11 @@ config MDIO_BITBANG config MDIO_GPIO tristate "Support for GPIO lib-based bitbanged MDIO buses" - depends on MDIO_BITBANG && OF_GPIO + depends on MDIO_BITBANG && GENERIC_GPIO ---help--- Supports GPIO lib-based MDIO busses. + To compile this driver as a module, choose M here: the module + will be called mdio-gpio. + endif # PHYLIB diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c index 2ff97754e574..a439ebeb4319 100644 --- a/drivers/net/phy/mdio-gpio.c +++ b/drivers/net/phy/mdio-gpio.c @@ -1,9 +1,12 @@ /* - * OpenFirmware GPIO based MDIO bitbang driver. + * GPIO based MDIO bitbang driver. + * Supports OpenFirmware. * * Copyright (c) 2008 CSE Semaphore Belgium. * by Laurent Pinchart * + * Copyright (C) 2008, Paulius Zaleckas + * * Based on earlier work by * * Copyright (c) 2003 Intracom S.A. @@ -21,9 +24,14 @@ #include #include #include -#include +#include +#include +#include + +#ifdef CONFIG_OF_GPIO #include #include +#endif struct mdio_gpio_info { struct mdiobb_ctrl ctrl; @@ -41,7 +49,7 @@ static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir) gpio_direction_input(bitbang->mdio); } -static int mdio_read(struct mdiobb_ctrl *ctrl) +static int mdio_get(struct mdiobb_ctrl *ctrl) { struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); @@ -49,7 +57,7 @@ static int mdio_read(struct mdiobb_ctrl *ctrl) return gpio_get_value(bitbang->mdio); } -static void mdio(struct mdiobb_ctrl *ctrl, int what) +static void mdio_set(struct mdiobb_ctrl *ctrl, int what) { struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); @@ -57,7 +65,7 @@ static void mdio(struct mdiobb_ctrl *ctrl, int what) gpio_set_value(bitbang->mdio, what); } -static void mdc(struct mdiobb_ctrl *ctrl, int what) +static void mdc_set(struct mdiobb_ctrl *ctrl, int what) { struct mdio_gpio_info *bitbang = container_of(ctrl, struct mdio_gpio_info, ctrl); @@ -67,93 +75,69 @@ static void mdc(struct mdiobb_ctrl *ctrl, int what) static struct mdiobb_ops mdio_gpio_ops = { .owner = THIS_MODULE, - .set_mdc = mdc, + .set_mdc = mdc_set, .set_mdio_dir = mdio_dir, - .set_mdio_data = mdio, - .get_mdio_data = mdio_read, + .set_mdio_data = mdio_set, + .get_mdio_data = mdio_get, }; -static int __devinit mdio_ofgpio_bitbang_init(struct mii_bus *bus, - struct device_node *np) -{ - struct mdio_gpio_info *bitbang = bus->priv; - - bitbang->mdc = of_get_gpio(np, 0); - bitbang->mdio = of_get_gpio(np, 1); - - if (bitbang->mdc < 0 || bitbang->mdio < 0) - return -ENODEV; - - snprintf(bus->id, MII_BUS_ID_SIZE, "%x", bitbang->mdc); - return 0; -} - -static void __devinit add_phy(struct mii_bus *bus, struct device_node *np) +static int __devinit mdio_gpio_bus_init(struct device *dev, + struct mdio_gpio_platform_data *pdata, + int bus_id) { - const u32 *data; - int len, id, irq; - - data = of_get_property(np, "reg", &len); - if (!data || len != 4) - return; - - id = *data; - bus->phy_mask &= ~(1 << id); - - irq = of_irq_to_resource(np, 0, NULL); - if (irq != NO_IRQ) - bus->irq[id] = irq; -} - -static int __devinit mdio_ofgpio_probe(struct of_device *ofdev, - const struct of_device_id *match) -{ - struct device_node *np = NULL; struct mii_bus *new_bus; struct mdio_gpio_info *bitbang; int ret = -ENOMEM; int i; - bitbang = kzalloc(sizeof(struct mdio_gpio_info), GFP_KERNEL); + bitbang = kzalloc(sizeof(*bitbang), GFP_KERNEL); if (!bitbang) goto out; bitbang->ctrl.ops = &mdio_gpio_ops; + bitbang->mdc = pdata->mdc; + bitbang->mdio = pdata->mdio; new_bus = alloc_mdio_bitbang(&bitbang->ctrl); if (!new_bus) goto out_free_bitbang; - new_bus->name = "GPIO Bitbanged MII", + new_bus->name = "GPIO Bitbanged MDIO", - ret = mdio_ofgpio_bitbang_init(new_bus, ofdev->node); - if (ret) - goto out_free_bus; + ret = -ENODEV; - new_bus->phy_mask = ~0; - new_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); - if (!new_bus->irq) + new_bus->phy_mask = pdata->phy_mask; + new_bus->irq = pdata->irqs; + new_bus->parent = dev; + + if (new_bus->phy_mask == ~0) goto out_free_bus; for (i = 0; i < PHY_MAX_ADDR; i++) - new_bus->irq[i] = -1; + if (!new_bus->irq[i]) + new_bus->irq[i] = PHY_POLL; - while ((np = of_get_next_child(ofdev->node, np))) - if (!strcmp(np->type, "ethernet-phy")) - add_phy(new_bus, np); + snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", bus_id); + + if (gpio_request(bitbang->mdc, "mdc")) + goto out_free_bus; - new_bus->parent = &ofdev->dev; - dev_set_drvdata(&ofdev->dev, new_bus); + if (gpio_request(bitbang->mdio, "mdio")) + goto out_free_mdc; + + dev_set_drvdata(dev, new_bus); ret = mdiobus_register(new_bus); if (ret) - goto out_free_irqs; + goto out_free_all; return 0; -out_free_irqs: - dev_set_drvdata(&ofdev->dev, NULL); - kfree(new_bus->irq); +out_free_all: + dev_set_drvdata(dev, NULL); + gpio_free(bitbang->mdio); +out_free_mdc: + gpio_free(bitbang->mdc); out_free_bus: free_mdio_bitbang(new_bus); out_free_bitbang: @@ -162,16 +146,86 @@ out: return ret; } -static int mdio_ofgpio_remove(struct of_device *ofdev) +static void __devexit mdio_gpio_bus_destroy(struct device *dev) { - struct mii_bus *bus = dev_get_drvdata(&ofdev->dev); + struct mii_bus *bus = dev_get_drvdata(dev); struct mdio_gpio_info *bitbang = bus->priv; mdiobus_unregister(bus); - kfree(bus->irq); free_mdio_bitbang(bus); - dev_set_drvdata(&ofdev->dev, NULL); + dev_set_drvdata(dev, NULL); + gpio_free(bitbang->mdc); + gpio_free(bitbang->mdio); kfree(bitbang); +} + +static int __devinit mdio_gpio_probe(struct platform_device *pdev) +{ + struct mdio_gpio_platform_data *pdata = pdev->dev.platform_data; + + if (!pdata) + return -ENODEV; + + return mdio_gpio_bus_init(&pdev->dev, pdata, pdev->id); +} + +static int __devexit mdio_gpio_remove(struct platform_device *pdev) +{ + mdio_gpio_bus_destroy(&pdev->dev); + + return 0; +} + +#ifdef CONFIG_OF_GPIO +static void __devinit add_phy(struct mdio_gpio_platform_data *pdata, + struct device_node *np) +{ + const u32 *data; + int len, id, irq; + + data = of_get_property(np, "reg", &len); + if (!data || len != 4) + return; + + id = *data; + pdata->phy_mask &= ~(1 << id); + + irq = of_irq_to_resource(np, 0, NULL); + if (irq) + pdata->irqs[id] = irq; +} + +static int __devinit mdio_ofgpio_probe(struct of_device *ofdev, + const struct of_device_id *match) +{ + struct device_node *np = NULL; + struct mdio_gpio_platform_data *pdata; + + pdata = kzalloc(sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + pdata->mdc = of_get_gpio(ofdev->node, 0); + pdata->mdio = of_get_gpio(ofdev->node, 1); + + if (pdata->mdc < 0 || pdata->mdio < 0) + goto out_free; + + while ((np = of_get_next_child(ofdev->node, np))) + if (!strcmp(np->type, "ethernet-phy")) + add_phy(pdata, np); + + return mdio_gpio_bus_init(&ofdev->dev, pdata, pdata->mdc); + +out_free: + kfree(pdata); + return -ENODEV; +} + +static int __devexit mdio_ofgpio_remove(struct of_device *ofdev) +{ + mdio_gpio_bus_destroy(&ofdev->dev); + kfree(ofdev->dev.platform_data); return 0; } @@ -187,18 +241,56 @@ static struct of_platform_driver mdio_ofgpio_driver = { .name = "mdio-gpio", .match_table = mdio_ofgpio_match, .probe = mdio_ofgpio_probe, - .remove = mdio_ofgpio_remove, + .remove = __devexit_p(mdio_ofgpio_remove), }; -static int mdio_ofgpio_init(void) +static inline int __init mdio_ofgpio_init(void) { return of_register_platform_driver(&mdio_ofgpio_driver); } -static void mdio_ofgpio_exit(void) +static inline void __exit mdio_ofgpio_exit(void) { of_unregister_platform_driver(&mdio_ofgpio_driver); } +#else +static inline int __init mdio_ofgpio_init(void) { return 0; } +static inline void __exit mdio_ofgpio_exit(void) { } +#endif /* CONFIG_OF_GPIO */ + +static struct platform_driver mdio_gpio_driver = { + .probe = mdio_gpio_probe, + .remove = __devexit_p(mdio_gpio_remove), + .driver = { + .name = "mdio-gpio", + .owner = THIS_MODULE, + }, +}; + +static int __init mdio_gpio_init(void) +{ + int ret; + + ret = mdio_ofgpio_init(); + if (ret) + return ret; + + ret = platform_driver_register(&mdio_gpio_driver); + if (ret) + mdio_ofgpio_exit(); + + return ret; +} +module_init(mdio_gpio_init); + +static void __exit mdio_gpio_exit(void) +{ + platform_driver_unregister(&mdio_gpio_driver); + mdio_ofgpio_exit(); +} +module_exit(mdio_gpio_exit); -module_init(mdio_ofgpio_init); -module_exit(mdio_ofgpio_exit); +MODULE_ALIAS("platform:mdio-gpio"); +MODULE_AUTHOR("Laurent Pinchart, Paulius Zaleckas"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Generic driver for MDIO bus emulation using GPIO"); diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h new file mode 100644 index 000000000000..e9d3fdfe41d7 --- /dev/null +++ b/include/linux/mdio-gpio.h @@ -0,0 +1,25 @@ +/* + * MDIO-GPIO bus platform data structures + * + * Copyright (C) 2008, Paulius Zaleckas + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __LINUX_MDIO_GPIO_H +#define __LINUX_MDIO_GPIO_H + +#include + +struct mdio_gpio_platform_data { + /* GPIO numbers for bus pins */ + unsigned int mdc; + unsigned int mdio; + + unsigned int phy_mask; + int irqs[PHY_MAX_ADDR]; +}; + +#endif /* __LINUX_MDIO_GPIO_H */ -- cgit v1.2.3 From e8b2dfe9b4501ed0047459b2756ba26e5a940a69 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Sun, 16 Nov 2008 19:32:39 -0800 Subject: TPROXY: implemented IP_RECVORIGDSTADDR socket option In case UDP traffic is redirected to a local UDP socket, the originally addressed destination address/port cannot be recovered with the in-kernel tproxy. This patch adds an IP_RECVORIGDSTADDR sockopt that enables a IP_ORIGDSTADDR ancillary message in recvmsg(). This ancillary message contains the original destination address/port of the packet being received. Signed-off-by: Balazs Scheidler Signed-off-by: David S. Miller --- include/linux/in.h | 4 ++++ net/ipv4/ip_sockglue.c | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/in.h b/include/linux/in.h index db458beef19d..d60122a3a088 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -80,6 +80,10 @@ struct in_addr { /* BSD compatibility */ #define IP_RECVRETOPTS IP_RETOPTS +/* TProxy original addresses */ +#define IP_ORIGDSTADDR 20 +#define IP_RECVORIGDSTADDR IP_ORIGDSTADDR + /* IP_MTU_DISCOVER values */ #define IP_PMTUDISC_DONT 0 /* Never send DF frames */ #define IP_PMTUDISC_WANT 1 /* Use per route hints */ diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index e976efeb1456..624b534201e2 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -48,6 +48,7 @@ #define IP_CMSG_RECVOPTS 8 #define IP_CMSG_RETOPTS 16 #define IP_CMSG_PASSSEC 32 +#define IP_CMSG_ORIGDSTADDR 64 /* * SOL_IP control messages. @@ -126,6 +127,27 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) security_release_secctx(secdata, seclen); } +void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) +{ + struct sockaddr_in sin; + struct iphdr *iph = ip_hdr(skb); + u16 *ports = (u16 *) skb_transport_header(skb); + + if (skb_transport_offset(skb) + 4 > skb->len) + return; + + /* All current transport protocols have the port numbers in the + * first four bytes of the transport header and this function is + * written with this assumption in mind. + */ + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = iph->daddr; + sin.sin_port = ports[1]; + memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); + + put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin); +} void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { @@ -160,6 +182,12 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) if (flags & 1) ip_cmsg_recv_security(msg, skb); + + if ((flags>>=1) == 0) + return; + if (flags & 1) + ip_cmsg_recv_dstaddr(msg, skb); + } int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc) @@ -421,7 +449,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; @@ -509,6 +538,12 @@ static int do_ip_setsockopt(struct sock *sk, int level, else inet->cmsg_flags &= ~IP_CMSG_PASSSEC; break; + case IP_RECVORIGDSTADDR: + if (val) + inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR; + else + inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR; + break; case IP_TOS: /* This sets both TOS and Precedence */ if (sk->sk_type == SOCK_STREAM) { val &= ~3; @@ -1022,6 +1057,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_PASSSEC: val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0; break; + case IP_RECVORIGDSTADDR: + val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0; + break; case IP_TOS: val = inet->tos; break; -- cgit v1.2.3 From bbaffaca4810de1a25e32ecaf836eeaacc7a3d11 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:37:55 -0800 Subject: rcu: Introduce hlist_nulls variant of hlist hlist uses NULL value to finish a chain. hlist_nulls variant use the low order bit set to 1 to signal an end-of-list marker. This allows to store many different end markers, so that some RCU lockless algos (used in TCP/UDP stack for example) can save some memory barriers in fast paths. Two new files are added : include/linux/list_nulls.h - mimics hlist part of include/linux/list.h, derived to hlist_nulls variant include/linux/rculist_nulls.h - mimics hlist part of include/linux/rculist.h, derived to hlist_nulls variant Only four helpers are declared for the moment : hlist_nulls_del_init_rcu(), hlist_nulls_del_rcu(), hlist_nulls_add_head_rcu() and hlist_nulls_for_each_entry_rcu() prefetches() were removed, since an end of list is not anymore NULL value. prefetches() could trigger useless (and possibly dangerous) memory transactions. Example of use (extracted from __udp4_lib_lookup()) struct sock *sk, *result; struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness; rcu_read_lock(); begin: result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != hash) goto begin; if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { sock_put(result); goto begin; } } rcu_read_unlock(); return result; Signed-off-by: Eric Dumazet Acked-by: Peter Zijlstra Signed-off-by: David S. Miller --- include/linux/list_nulls.h | 94 ++++++++++++++++++++++++++++++++++++ include/linux/rculist_nulls.h | 110 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 204 insertions(+) create mode 100644 include/linux/list_nulls.h create mode 100644 include/linux/rculist_nulls.h (limited to 'include') diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h new file mode 100644 index 000000000000..93150ecf3ea4 --- /dev/null +++ b/include/linux/list_nulls.h @@ -0,0 +1,94 @@ +#ifndef _LINUX_LIST_NULLS_H +#define _LINUX_LIST_NULLS_H + +/* + * Special version of lists, where end of list is not a NULL pointer, + * but a 'nulls' marker, which can have many different values. + * (up to 2^31 different values guaranteed on all platforms) + * + * In the standard hlist, termination of a list is the NULL pointer. + * In this special 'nulls' variant, we use the fact that objects stored in + * a list are aligned on a word (4 or 8 bytes alignment). + * We therefore use the last significant bit of 'ptr' : + * Set to 1 : This is a 'nulls' end-of-list marker (ptr >> 1) + * Set to 0 : This is a pointer to some object (ptr) + */ + +struct hlist_nulls_head { + struct hlist_nulls_node *first; +}; + +struct hlist_nulls_node { + struct hlist_nulls_node *next, **pprev; +}; +#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \ + ((ptr)->first = (struct hlist_nulls_node *) (1UL | (((long)nulls) << 1))) + +#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) +/** + * ptr_is_a_nulls - Test if a ptr is a nulls + * @ptr: ptr to be tested + * + */ +static inline int is_a_nulls(const struct hlist_nulls_node *ptr) +{ + return ((unsigned long)ptr & 1); +} + +/** + * get_nulls_value - Get the 'nulls' value of the end of chain + * @ptr: end of chain + * + * Should be called only if is_a_nulls(ptr); + */ +static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr) +{ + return ((unsigned long)ptr) >> 1; +} + +static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h) +{ + return !h->pprev; +} + +static inline int hlist_nulls_empty(const struct hlist_nulls_head *h) +{ + return is_a_nulls(h->first); +} + +static inline void __hlist_nulls_del(struct hlist_nulls_node *n) +{ + struct hlist_nulls_node *next = n->next; + struct hlist_nulls_node **pprev = n->pprev; + *pprev = next; + if (!is_a_nulls(next)) + next->pprev = pprev; +} + +/** + * hlist_nulls_for_each_entry - iterate over list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + */ +#define hlist_nulls_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->first; \ + (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_nulls_for_each_entry_from - iterate over a hlist continuing from current point + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + * + */ +#define hlist_nulls_for_each_entry_from(tpos, pos, member) \ + for (; (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +#endif diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h new file mode 100644 index 000000000000..f9ddd03961a8 --- /dev/null +++ b/include/linux/rculist_nulls.h @@ -0,0 +1,110 @@ +#ifndef _LINUX_RCULIST_NULLS_H +#define _LINUX_RCULIST_NULLS_H + +#ifdef __KERNEL__ + +/* + * RCU-protected list version + */ +#include +#include + +/** + * hlist_nulls_del_init_rcu - deletes entry from hash list with re-initialization + * @n: the element to delete from the hash list. + * + * Note: hlist_nulls_unhashed() on the node return true after this. It is + * useful for RCU based read lockfree traversal if the writer side + * must know if the list entry is still hashed or already unhashed. + * + * In particular, it means that we can not poison the forward pointers + * that may still be used for walking the hash list and we can only + * zero the pprev pointer so list_unhashed() will return true after + * this. + * + * The caller must take whatever precautions are necessary (such as + * holding appropriate locks) to avoid racing with another + * list-mutation primitive, such as hlist_nulls_add_head_rcu() or + * hlist_nulls_del_rcu(), running on this same list. However, it is + * perfectly legal to run concurrently with the _rcu list-traversal + * primitives, such as hlist_nulls_for_each_entry_rcu(). + */ +static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) +{ + if (!hlist_nulls_unhashed(n)) { + __hlist_nulls_del(n); + n->pprev = NULL; + } +} + +/** + * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization + * @n: the element to delete from the hash list. + * + * Note: hlist_nulls_unhashed() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() + * or hlist_nulls_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_nulls_for_each_entry(). + */ +static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) +{ + __hlist_nulls_del(n); + n->pprev = LIST_POISON2; +} + +/** + * hlist_nulls_add_head_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist_nulls, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() + * or hlist_nulls_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, + struct hlist_nulls_head *h) +{ + struct hlist_nulls_node *first = h->first; + + n->next = first; + n->pprev = &h->first; + rcu_assign_pointer(h->first, n); + if (!is_a_nulls(first)) + first->pprev = &n->next; +} +/** + * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_nulls_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_nulls_node within the struct. + * + */ +#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference((head)->first); \ + (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference(pos->next)) + +#endif +#endif -- cgit v1.2.3 From 88ab1932eac721c6e7336708558fa5ed02c85c80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:39:21 -0800 Subject: udp: Use hlist_nulls in UDP RCU code This is a straightforward patch, using hlist_nulls infrastructure. RCUification already done on UDP two weeks ago. Using hlist_nulls permits us to avoid some memory barriers, both at lookup time and delete time. Patch is large because it adds new macros to include/net/sock.h. These macros will be used by TCP & DCCP in next patch. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rculist.h | 17 --------------- include/net/sock.h | 57 +++++++++++++++++++++++++++++++++++++++---------- include/net/udp.h | 2 +- net/ipv4/udp.c | 47 +++++++++++++++++++--------------------- net/ipv6/udp.c | 26 +++++++++++----------- 5 files changed, 83 insertions(+), 66 deletions(-) (limited to 'include') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 3ba2998b22ba..e649bd3f2c97 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -383,22 +383,5 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ pos = rcu_dereference(pos->next)) -/** - * hlist_for_each_entry_rcu_safenext - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - * @next: the &struct hlist_node to use as a next cursor - * - * Special version of hlist_for_each_entry_rcu that make sure - * each next pointer is fetched before each iteration. - */ -#define hlist_for_each_entry_rcu_safenext(tpos, pos, head, member, next) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference(next)) - #endif /* __KERNEL__ */ #endif diff --git a/include/net/sock.h b/include/net/sock.h index 8b2b82131b67..0a638948868d 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -42,6 +42,7 @@ #include #include +#include #include #include #include @@ -52,6 +53,7 @@ #include #include +#include #include #include @@ -106,6 +108,7 @@ struct net; * @skc_reuse: %SO_REUSEADDR setting * @skc_bound_dev_if: bound device index if != 0 * @skc_node: main hash linkage for various protocol lookup tables + * @skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_refcnt: reference count * @skc_hash: hash value used with various protocol lookup tables @@ -120,7 +123,10 @@ struct sock_common { volatile unsigned char skc_state; unsigned char skc_reuse; int skc_bound_dev_if; - struct hlist_node skc_node; + union { + struct hlist_node skc_node; + struct hlist_nulls_node skc_nulls_node; + }; struct hlist_node skc_bind_node; atomic_t skc_refcnt; unsigned int skc_hash; @@ -206,6 +212,7 @@ struct sock { #define sk_reuse __sk_common.skc_reuse #define sk_bound_dev_if __sk_common.skc_bound_dev_if #define sk_node __sk_common.skc_node +#define sk_nulls_node __sk_common.skc_nulls_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt #define sk_hash __sk_common.skc_hash @@ -300,12 +307,30 @@ static inline struct sock *sk_head(const struct hlist_head *head) return hlist_empty(head) ? NULL : __sk_head(head); } +static inline struct sock *__sk_nulls_head(const struct hlist_nulls_head *head) +{ + return hlist_nulls_entry(head->first, struct sock, sk_nulls_node); +} + +static inline struct sock *sk_nulls_head(const struct hlist_nulls_head *head) +{ + return hlist_nulls_empty(head) ? NULL : __sk_nulls_head(head); +} + static inline struct sock *sk_next(const struct sock *sk) { return sk->sk_node.next ? hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; } +static inline struct sock *sk_nulls_next(const struct sock *sk) +{ + return (!is_a_nulls(sk->sk_nulls_node.next)) ? + hlist_nulls_entry(sk->sk_nulls_node.next, + struct sock, sk_nulls_node) : + NULL; +} + static inline int sk_unhashed(const struct sock *sk) { return hlist_unhashed(&sk->sk_node); @@ -321,6 +346,11 @@ static __inline__ void sk_node_init(struct hlist_node *node) node->pprev = NULL; } +static __inline__ void sk_nulls_node_init(struct hlist_nulls_node *node) +{ + node->pprev = NULL; +} + static __inline__ void __sk_del_node(struct sock *sk) { __hlist_del(&sk->sk_node); @@ -367,18 +397,18 @@ static __inline__ int sk_del_node_init(struct sock *sk) return rc; } -static __inline__ int __sk_del_node_init_rcu(struct sock *sk) +static __inline__ int __sk_nulls_del_node_init_rcu(struct sock *sk) { if (sk_hashed(sk)) { - hlist_del_init_rcu(&sk->sk_node); + hlist_nulls_del_init_rcu(&sk->sk_nulls_node); return 1; } return 0; } -static __inline__ int sk_del_node_init_rcu(struct sock *sk) +static __inline__ int sk_nulls_del_node_init_rcu(struct sock *sk) { - int rc = __sk_del_node_init_rcu(sk); + int rc = __sk_nulls_del_node_init_rcu(sk); if (rc) { /* paranoid for a while -acme */ @@ -399,15 +429,15 @@ static __inline__ void sk_add_node(struct sock *sk, struct hlist_head *list) __sk_add_node(sk, list); } -static __inline__ void __sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +static __inline__ void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { - hlist_add_head_rcu(&sk->sk_node, list); + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } -static __inline__ void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) +static __inline__ void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { sock_hold(sk); - __sk_add_node_rcu(sk, list); + __sk_nulls_add_node_rcu(sk, list); } static __inline__ void __sk_del_bind_node(struct sock *sk) @@ -423,11 +453,16 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_for_each(__sk, node, list) \ hlist_for_each_entry(__sk, node, list, sk_node) -#define sk_for_each_rcu_safenext(__sk, node, list, next) \ - hlist_for_each_entry_rcu_safenext(__sk, node, list, sk_node, next) +#define sk_nulls_for_each(__sk, node, list) \ + hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) +#define sk_nulls_for_each_rcu(__sk, node, list) \ + hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node) #define sk_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_from(__sk, node, sk_node) +#define sk_nulls_for_each_from(__sk, node) \ + if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \ + hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node) #define sk_for_each_continue(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ hlist_for_each_entry_continue(__sk, node, sk_node) diff --git a/include/net/udp.h b/include/net/udp.h index df2bfe545374..90e6ce56be65 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -51,7 +51,7 @@ struct udp_skb_cb { #define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) struct udp_hslot { - struct hlist_head head; + struct hlist_nulls_head head; spinlock_t lock; } __attribute__((aligned(2 * sizeof(long)))); struct udp_table { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 54badc9a019d..fea2d873dd41 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -127,9 +127,9 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct sock *sk2)) { struct sock *sk2; - struct hlist_node *node; + struct hlist_nulls_node *node; - sk_for_each(sk2, node, &hslot->head) + sk_nulls_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && sk2 != sk && sk2->sk_hash == num && @@ -189,12 +189,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { - /* - * We need that previous write to sk->sk_hash committed - * before write to sk->next done in following add_node() variant - */ - smp_wmb(); - sk_add_node_rcu(sk, &hslot->head); + sk_nulls_add_node_rcu(sk, &hslot->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } error = 0; @@ -261,7 +256,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, int dif, struct udp_table *udptable) { struct sock *sk, *result; - struct hlist_node *node, *next; + struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; @@ -271,13 +266,7 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, begin: result = NULL; badness = -1; - sk_for_each_rcu_safenext(sk, node, &hslot->head, next) { - /* - * lockless reader, and SLAB_DESTROY_BY_RCU items: - * We must check this item was not moved to another chain - */ - if (udp_hashfn(net, sk->sk_hash) != hash) - goto begin; + sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, saddr, hnum, sport, daddr, dport, dif); if (score > badness) { @@ -285,6 +274,14 @@ begin: badness = score; } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash) + goto begin; + if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; @@ -325,11 +322,11 @@ static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, __be16 rmt_port, __be32 rmt_addr, int dif) { - struct hlist_node *node; + struct hlist_nulls_node *node; struct sock *s = sk; unsigned short hnum = ntohs(loc_port); - sk_for_each_from(s, node) { + sk_nulls_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); if (!net_eq(sock_net(s), net) || @@ -977,7 +974,7 @@ void udp_lib_unhash(struct sock *sk) struct udp_hslot *hslot = &udptable->hash[hash]; spin_lock_bh(&hslot->lock); - if (sk_del_node_init_rcu(sk)) { + if (sk_nulls_del_node_init_rcu(sk)) { inet_sk(sk)->num = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } @@ -1130,7 +1127,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, int dif; spin_lock(&hslot->lock); - sk = sk_head(&hslot->head); + sk = sk_nulls_head(&hslot->head); dif = skb->dev->ifindex; sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { @@ -1139,7 +1136,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, do { struct sk_buff *skb1 = skb; - sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest, + sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, uh->source, saddr, dif); if (sknext) @@ -1560,10 +1557,10 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) struct net *net = seq_file_net(seq); for (state->bucket = start; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { - struct hlist_node *node; + struct hlist_nulls_node *node; struct udp_hslot *hslot = &state->udp_table->hash[state->bucket]; spin_lock_bh(&hslot->lock); - sk_for_each(sk, node, &hslot->head) { + sk_nulls_for_each(sk, node, &hslot->head) { if (!net_eq(sock_net(sk), net)) continue; if (sk->sk_family == state->family) @@ -1582,7 +1579,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) struct net *net = seq_file_net(seq); do { - sk = sk_next(sk); + sk = sk_nulls_next(sk); } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); if (!sk) { @@ -1753,7 +1750,7 @@ void __init udp_table_init(struct udp_table *table) int i; for (i = 0; i < UDP_HTABLE_SIZE; i++) { - INIT_HLIST_HEAD(&table->hash[i].head); + INIT_HLIST_NULLS_HEAD(&table->hash[i].head, i); spin_lock_init(&table->hash[i].lock); } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8dafa36b1ba5..fd2d9ad4a8a3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -98,7 +98,7 @@ static struct sock *__udp6_lib_lookup(struct net *net, int dif, struct udp_table *udptable) { struct sock *sk, *result; - struct hlist_node *node, *next; + struct hlist_nulls_node *node; unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; @@ -108,19 +108,21 @@ static struct sock *__udp6_lib_lookup(struct net *net, begin: result = NULL; badness = -1; - sk_for_each_rcu_safenext(sk, node, &hslot->head, next) { - /* - * lockless reader, and SLAB_DESTROY_BY_RCU items: - * We must check this item was not moved to another chain - */ - if (udp_hashfn(net, sk->sk_hash) != hash) - goto begin; + sk_nulls_for_each_rcu(sk, node, &hslot->head) { score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); if (score > badness) { result = sk; badness = score; } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash) + goto begin; + if (result) { if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) result = NULL; @@ -374,11 +376,11 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, __be16 rmt_port, struct in6_addr *rmt_addr, int dif) { - struct hlist_node *node; + struct hlist_nulls_node *node; struct sock *s = sk; unsigned short num = ntohs(loc_port); - sk_for_each_from(s, node) { + sk_nulls_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); if (!net_eq(sock_net(s), net)) @@ -423,7 +425,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, int dif; spin_lock(&hslot->lock); - sk = sk_head(&hslot->head); + sk = sk_nulls_head(&hslot->head); dif = inet6_iif(skb); sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { @@ -432,7 +434,7 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, } sk2 = sk; - while ((sk2 = udp_v6_mcast_next(net, sk_next(sk2), uh->dest, daddr, + while ((sk2 = udp_v6_mcast_next(net, sk_nulls_next(sk2), uh->dest, daddr, uh->source, saddr, dif))) { struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); if (buff) { -- cgit v1.2.3 From 3ab5aee7fe840b5b1b35a8d1ac11c3de5281e611 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:40:17 -0800 Subject: net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls RCU was added to UDP lookups, using a fast infrastructure : - sockets kmem_cache use SLAB_DESTROY_BY_RCU and dont pay the price of call_rcu() at freeing time. - hlist_nulls permits to use few memory barriers. This patch uses same infrastructure for TCP/DCCP established and timewait sockets. Thanks to SLAB_DESTROY_BY_RCU, no slowdown for applications using short lived TCP connections. A followup patch, converting rwlocks to spinlocks will even speedup this case. __inet_lookup_established() is pretty fast now we dont have to dirty a contended cache line (read_lock/read_unlock) Only established and timewait hashtable are converted to RCU (bind table and listen table are still using traditional locking) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 4 +-- include/net/inet_timewait_sock.h | 10 +++--- net/core/sock.c | 4 ++- net/dccp/ipv4.c | 1 + net/dccp/ipv6.c | 1 + net/dccp/proto.c | 4 +-- net/ipv4/inet_diag.c | 7 ++-- net/ipv4/inet_hashtables.c | 78 ++++++++++++++++++++++++++++------------ net/ipv4/inet_timewait_sock.c | 26 ++++++++------ net/ipv4/tcp.c | 4 +-- net/ipv4/tcp_ipv4.c | 25 ++++++------- net/ipv6/inet6_hashtables.c | 70 ++++++++++++++++++++++++------------ net/ipv6/tcp_ipv6.c | 1 + 13 files changed, 151 insertions(+), 84 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index cb31fbf8ae2a..481896045111 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -41,8 +41,8 @@ * I'll experiment with dynamic table growth later. */ struct inet_ehash_bucket { - struct hlist_head chain; - struct hlist_head twchain; + struct hlist_nulls_head chain; + struct hlist_nulls_head twchain; }; /* There are a few simple rules, which allow for local port reuse by diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 80e4977631b8..4b8ece22b8e9 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -110,7 +110,7 @@ struct inet_timewait_sock { #define tw_state __tw_common.skc_state #define tw_reuse __tw_common.skc_reuse #define tw_bound_dev_if __tw_common.skc_bound_dev_if -#define tw_node __tw_common.skc_node +#define tw_node __tw_common.skc_nulls_node #define tw_bind_node __tw_common.skc_bind_node #define tw_refcnt __tw_common.skc_refcnt #define tw_hash __tw_common.skc_hash @@ -137,10 +137,10 @@ struct inet_timewait_sock { struct hlist_node tw_death_node; }; -static inline void inet_twsk_add_node(struct inet_timewait_sock *tw, - struct hlist_head *list) +static inline void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, + struct hlist_nulls_head *list) { - hlist_add_head(&tw->tw_node, list); + hlist_nulls_add_head_rcu(&tw->tw_node, list); } static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, @@ -175,7 +175,7 @@ static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) } #define inet_twsk_for_each(tw, node, head) \ - hlist_for_each_entry(tw, node, head, tw_node) + hlist_nulls_for_each_entry(tw, node, head, tw_node) #define inet_twsk_for_each_inmate(tw, node, jail) \ hlist_for_each_entry(tw, node, jail, tw_death_node) diff --git a/net/core/sock.c b/net/core/sock.c index ded1eb5d2fd4..38de9c3f563b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2082,7 +2082,9 @@ int proto_register(struct proto *prot, int alloc_slab) prot->twsk_prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name, prot->twsk_prot->twsk_obj_size, - 0, SLAB_HWCACHE_ALIGN, + 0, + SLAB_HWCACHE_ALIGN | + prot->slab_flags, NULL); if (prot->twsk_prot->twsk_slab == NULL) goto out_free_timewait_sock_slab_name; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 528baa2e5be4..d1dd95289b89 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -938,6 +938,7 @@ static struct proto dccp_v4_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .rsk_prot = &dccp_request_sock_ops, .twsk_prot = &dccp_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4aa1148cdb20..f033e845bb07 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1140,6 +1140,7 @@ static struct proto dccp_v6_prot = { .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .rsk_prot = &dccp6_request_sock_ops, .twsk_prot = &dccp6_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 46cb3490d48e..1117d4d8c8f1 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1090,8 +1090,8 @@ static int __init dccp_init(void) } for (i = 0; i < dccp_hashinfo.ehash_size; i++) { - INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); - INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); + INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); + INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i); } if (inet_ehash_locks_alloc(&dccp_hashinfo)) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 564230dabcb8..41b36720e977 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -778,18 +778,19 @@ skip_listen_ht: struct inet_ehash_bucket *head = &hashinfo->ehash[i]; rwlock_t *lock = inet_ehash_lockp(hashinfo, i); struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; num = 0; - if (hlist_empty(&head->chain) && hlist_empty(&head->twchain)) + if (hlist_nulls_empty(&head->chain) && + hlist_nulls_empty(&head->twchain)) continue; if (i > s_i) s_num = 0; read_lock_bh(lock); - sk_for_each(sk, node, &head->chain) { + sk_nulls_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index be41ebbec4eb..fd269cfef0ec 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -223,35 +223,65 @@ struct sock * __inet_lookup_established(struct net *net, INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; - const struct hlist_node *node; + const struct hlist_nulls_node *node; /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); + unsigned int slot = hash & (hashinfo->ehash_size - 1); + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - prefetch(head->chain.first); - read_lock(lock); - sk_for_each(sk, node, &head->chain) { + rcu_read_lock(); +begin: + sk_nulls_for_each_rcu(sk, node, &head->chain) { if (INET_MATCH(sk, net, hash, acookie, - saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ + saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + goto begintw; + if (unlikely(!INET_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif))) { + sock_put(sk); + goto begin; + } + goto out; + } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto begin; +begintw: /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &head->twchain) { + sk_nulls_for_each_rcu(sk, node, &head->twchain) { if (INET_TW_MATCH(sk, net, hash, acookie, - saddr, daddr, ports, dif)) - goto hit; + saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { + sk = NULL; + goto out; + } + if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif))) { + sock_put(sk); + goto begintw; + } + goto out; + } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto begintw; sk = NULL; out: - read_unlock(lock); + rcu_read_unlock(); return sk; -hit: - sock_hold(sk); - goto out; } EXPORT_SYMBOL_GPL(__inet_lookup_established); @@ -272,14 +302,14 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; - const struct hlist_node *node; + const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; prefetch(head->chain.first); write_lock(lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &head->twchain) { + sk_nulls_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET_TW_MATCH(sk2, net, hash, acookie, @@ -293,7 +323,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, tw = NULL; /* And established part... */ - sk_for_each(sk2, node, &head->chain) { + sk_nulls_for_each(sk2, node, &head->chain) { if (INET_MATCH(sk2, net, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; @@ -306,7 +336,7 @@ unique: inet->sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); + __sk_nulls_add_node_rcu(sk, &head->chain); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); @@ -338,7 +368,7 @@ static inline u32 inet_sk_port_offset(const struct sock *sk) void __inet_hash_nolisten(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; + struct hlist_nulls_head *list; rwlock_t *lock; struct inet_ehash_bucket *head; @@ -350,7 +380,7 @@ void __inet_hash_nolisten(struct sock *sk) lock = inet_ehash_lockp(hashinfo, sk->sk_hash); write_lock(lock); - __sk_add_node(sk, list); + __sk_nulls_add_node_rcu(sk, list); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); } @@ -400,13 +430,15 @@ void inet_unhash(struct sock *sk) local_bh_disable(); inet_listen_wlock(hashinfo); lock = &hashinfo->lhash_lock; + if (__sk_del_node_init(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } else { lock = inet_ehash_lockp(hashinfo, sk->sk_hash); write_lock_bh(lock); + if (__sk_nulls_del_node_init_rcu(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } - if (__sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); write_unlock_bh(lock); out: if (sk->sk_state == TCP_LISTEN) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 1c5fd38f8824..60689951ecdb 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -23,12 +23,12 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); write_lock(lock); - if (hlist_unhashed(&tw->tw_node)) { + if (hlist_nulls_unhashed(&tw->tw_node)) { write_unlock(lock); return; } - __hlist_del(&tw->tw_node); - sk_node_init(&tw->tw_node); + hlist_nulls_del_rcu(&tw->tw_node); + sk_nulls_node_init(&tw->tw_node); write_unlock(lock); /* Disassociate with bind bucket. */ @@ -92,13 +92,17 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, write_lock(lock); - /* Step 2: Remove SK from established hash. */ - if (__sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - - /* Step 3: Hash TW into TIMEWAIT chain. */ - inet_twsk_add_node(tw, &ehead->twchain); + /* + * Step 2: Hash TW into TIMEWAIT chain. + * Should be done before removing sk from established chain + * because readers are lockless and search established first. + */ atomic_inc(&tw->tw_refcnt); + inet_twsk_add_node_rcu(tw, &ehead->twchain); + + /* Step 3: Remove SK from established hash. */ + if (__sk_nulls_del_node_init_rcu(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); write_unlock(lock); } @@ -416,7 +420,7 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, { struct inet_timewait_sock *tw; struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; int h; local_bh_disable(); @@ -426,7 +430,7 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, rwlock_t *lock = inet_ehash_lockp(hashinfo, h); restart: write_lock(lock); - sk_for_each(sk, node, &head->twchain) { + sk_nulls_for_each(sk, node, &head->twchain) { tw = inet_twsk(sk); if (!net_eq(twsk_net(tw), net) || diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f60a5917e54d..044224a341eb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2707,8 +2707,8 @@ void __init tcp_init(void) thash_entries ? 0 : 512 * 1024); tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; for (i = 0; i < tcp_hashinfo.ehash_size; i++) { - INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); - INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain); + INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); + INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); } if (inet_ehash_locks_alloc(&tcp_hashinfo)) panic("TCP: failed to alloc ehash_locks"); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d49233f409b5..b2e3ab2287ba 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1857,16 +1857,16 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ -static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) +static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head) { - return hlist_empty(head) ? NULL : + return hlist_nulls_empty(head) ? NULL : list_entry(head->first, struct inet_timewait_sock, tw_node); } static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) { - return tw->tw_node.next ? - hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; + return !is_a_nulls(tw->tw_node.next) ? + hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; } static void *listening_get_next(struct seq_file *seq, void *cur) @@ -1954,8 +1954,8 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) static inline int empty_bucket(struct tcp_iter_state *st) { - return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) && - hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain); + return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && + hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); } static void *established_get_first(struct seq_file *seq) @@ -1966,7 +1966,7 @@ static void *established_get_first(struct seq_file *seq) for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; struct inet_timewait_sock *tw; rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); @@ -1975,7 +1975,7 @@ static void *established_get_first(struct seq_file *seq) continue; read_lock_bh(lock); - sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { + sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family || !net_eq(sock_net(sk), net)) { continue; @@ -2004,7 +2004,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) { struct sock *sk = cur; struct inet_timewait_sock *tw; - struct hlist_node *node; + struct hlist_nulls_node *node; struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); @@ -2032,11 +2032,11 @@ get_tw: return NULL; read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); - sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); + sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); } else - sk = sk_next(sk); + sk = sk_nulls_next(sk); - sk_for_each_from(sk, node) { + sk_nulls_for_each_from(sk, node) { if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) goto found; } @@ -2375,6 +2375,7 @@ struct proto tcp_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, .h.hashinfo = &tcp_hashinfo, diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 1646a5658255..c1b4d401fd95 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -25,24 +25,28 @@ void __inet6_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; rwlock_t *lock; WARN_ON(!sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { + struct hlist_head *list; + list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; lock = &hashinfo->lhash_lock; inet_listen_wlock(hashinfo); + __sk_add_node(sk, list); } else { unsigned int hash; + struct hlist_nulls_head *list; + sk->sk_hash = hash = inet6_sk_ehashfn(sk); list = &inet_ehash_bucket(hashinfo, hash)->chain; lock = inet_ehash_lockp(hashinfo, hash); write_lock(lock); + __sk_nulls_add_node_rcu(sk, list); } - __sk_add_node(sk, list); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); } @@ -63,33 +67,53 @@ struct sock *__inet6_lookup_established(struct net *net, const int dif) { struct sock *sk; - const struct hlist_node *node; + const struct hlist_nulls_node *node; const __portpair ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); + unsigned int slot = hash & (hashinfo->ehash_size - 1); + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - prefetch(head->chain.first); - read_lock(lock); - sk_for_each(sk, node, &head->chain) { + + rcu_read_lock(); +begin: + sk_nulls_for_each_rcu(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ - if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ + if (INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + goto begintw; + if (!INET6_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + sock_put(sk); + goto begin; + } + goto out; + } } + if (get_nulls_value(node) != slot) + goto begin; + +begintw: /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &head->twchain) { - if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) - goto hit; + sk_nulls_for_each_rcu(sk, node, &head->twchain) { + if (INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { + sk = NULL; + goto out; + } + if (!INET6_TW_MATCH(sk, net, hash, saddr, daddr, ports, dif)) { + sock_put(sk); + goto begintw; + } + goto out; + } } - read_unlock(lock); - return NULL; - -hit: - sock_hold(sk); - read_unlock(lock); + if (get_nulls_value(node) != slot) + goto begintw; + sk = NULL; +out: + rcu_read_unlock(); return sk; } EXPORT_SYMBOL(__inet6_lookup_established); @@ -172,14 +196,14 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; - const struct hlist_node *node; + const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; prefetch(head->chain.first); write_lock(lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &head->twchain) { + sk_nulls_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET6_TW_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) { @@ -192,7 +216,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, tw = NULL; /* And established part... */ - sk_for_each(sk2, node, &head->chain) { + sk_nulls_for_each(sk2, node, &head->chain) { if (INET6_MATCH(sk2, net, hash, saddr, daddr, ports, dif)) goto not_unique; } @@ -203,7 +227,7 @@ unique: inet->num = lport; inet->sport = htons(lport); WARN_ON(!sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); + __sk_nulls_add_node_rcu(sk, &head->chain); sk->sk_hash = hash; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 984276463a8d..b35787056313 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2043,6 +2043,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, .h.hashinfo = &tcp_hashinfo, -- cgit v1.2.3 From 5635c10d976716ef47ae441998aeae144c7e7387 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:46:36 -0800 Subject: net: make sure struct dst_entry refcount is aligned on 64 bytes As found in the past (commit f1dd9c379cac7d5a76259e7dffcd5f8edc697d17 [NET]: Fix tbench regression in 2.6.25-rc1), it is really important that struct dst_entry refcount is aligned on a cache line. We cannot use __atribute((aligned)), so manually pad the structure for 32 and 64 bit arches. for 32bit : offsetof(truct dst_entry, __refcnt) is 0x80 for 64bit : offsetof(truct dst_entry, __refcnt) is 0xc0 As it is not possible to guess at compile time cache line size, we use a generic value of 64 bytes, that satisfies many current arches. (Using 128 bytes alignment on 64bit arches would waste 64 bytes) Add a BUILD_BUG_ON to catch future updates to "struct dst_entry" dont break this alignment. "tbench 8" is 4.4 % faster on a dual quad core (HP BL460c G1), Intel E5450 @3.00GHz (2350 MB/s instead of 2250 MB/s) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 65a60fab2f82..6c778799bf10 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -61,6 +61,8 @@ struct dst_entry struct hh_cache *hh; #ifdef CONFIG_XFRM struct xfrm_state *xfrm; +#else + void *__pad1; #endif int (*input)(struct sk_buff*); int (*output)(struct sk_buff*); @@ -71,8 +73,20 @@ struct dst_entry #ifdef CONFIG_NET_CLS_ROUTE __u32 tclassid; +#else + __u32 __pad2; #endif + + /* + * Align __refcnt to a 64 bytes alignment + * (L1_CACHE_SIZE would be too much) + */ +#ifdef CONFIG_64BIT + long __pad_to_align_refcnt[2]; +#else + long __pad_to_align_refcnt[1]; +#endif /* * __refcnt wants to be on a different cache line from * input/output/ops or performance tanks badly @@ -157,6 +171,11 @@ dst_metric_locked(struct dst_entry *dst, int metric) static inline void dst_hold(struct dst_entry * dst) { + /* + * If your kernel compilation stops here, please check + * __pad_to_align_refcnt declaration in struct dst_entry + */ + BUILD_BUG_ON(offsetof(struct dst_entry, __refcnt) & 63); atomic_inc(&dst->__refcnt); } -- cgit v1.2.3 From 3f2c31d90327f21d76d296af34aa4ca547932ff4 Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Sun, 16 Nov 2008 22:41:34 -0800 Subject: virtio_net: VIRTIO_NET_F_MSG_RXBUF (imprive rcv buffer allocation) If segmentation offload is enabled by the host, we currently allocate maximum sized packet buffers and pass them to the host. This uses up 20 ring entries, allowing us to supply only 20 packet buffers to the host with a 256 entry ring. This is a huge overhead when receiving small packets, and is most keenly felt when receiving MTU sized packets from off-host. The VIRTIO_NET_F_MRG_RXBUF feature flag is set by hosts which support using receive buffers which are smaller than the maximum packet size. In order to transfer large packets to the guest, the host merges together multiple receive buffers to form a larger logical buffer. The number of merged buffers is returned to the guest via a field in the virtio_net_hdr. Make use of this support by supplying single page receive buffers to the host. On receive, we extract the virtio_net_hdr, copy 128 bytes of the payload to the skb's linear data buffer and adjust the fragment offset to point to the remaining data. This ensures proper alignment and allows us to not use any paged data for small packets. If the payload occupies multiple pages, we simply append those pages as fragments and free the associated skbs. This scheme allows us to be efficient in our use of ring entries while still supporting large packets. Benchmarking using netperf from an external machine to a guest over a 10Gb/s network shows a 100% improvement from ~1Gb/s to ~2Gb/s. With a local host->guest benchmark with GSO disabled on the host side, throughput was seen to increase from 700Mb/s to 1.7Gb/s. Based on a patch from Herbert Xu. Signed-off-by: Mark McLoughlin Signed-off-by: Rusty Russell (use netdev_priv) Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 173 +++++++++++++++++++++++++++++++++++++++------ include/linux/virtio_net.h | 9 +++ 2 files changed, 162 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 27559c987d47..e6b5d6ef9ea8 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -34,6 +34,7 @@ module_param(gso, bool, 0444); /* FIXME: MTU in config. */ #define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN) +#define GOOD_COPY_LEN 128 struct virtnet_info { @@ -58,6 +59,9 @@ struct virtnet_info /* I like... big packets and I cannot lie! */ bool big_packets; + /* Host will merge rx buffers for big packets (shake it! shake it!) */ + bool mergeable_rx_bufs; + /* Receive & send queues. */ struct sk_buff_head recv; struct sk_buff_head send; @@ -66,16 +70,11 @@ struct virtnet_info struct page *pages; }; -static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb) +static inline void *skb_vnet_hdr(struct sk_buff *skb) { return (struct virtio_net_hdr *)skb->cb; } -static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb) -{ - sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr)); -} - static void give_a_page(struct virtnet_info *vi, struct page *page) { page->private = (unsigned long)vi->pages; @@ -121,25 +120,97 @@ static void skb_xmit_done(struct virtqueue *svq) static void receive_skb(struct net_device *dev, struct sk_buff *skb, unsigned len) { + struct virtnet_info *vi = netdev_priv(dev); struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); int err; + int i; if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) { pr_debug("%s: short packet %i\n", dev->name, len); dev->stats.rx_length_errors++; goto drop; } - len -= sizeof(struct virtio_net_hdr); - if (len <= MAX_PACKET_LEN) - trim_pages(netdev_priv(dev), skb); + if (vi->mergeable_rx_bufs) { + struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); + unsigned int copy; + char *p = page_address(skb_shinfo(skb)->frags[0].page); - err = pskb_trim(skb, len); - if (err) { - pr_debug("%s: pskb_trim failed %i %d\n", dev->name, len, err); - dev->stats.rx_dropped++; - goto drop; + if (len > PAGE_SIZE) + len = PAGE_SIZE; + len -= sizeof(struct virtio_net_hdr_mrg_rxbuf); + + memcpy(hdr, p, sizeof(*mhdr)); + p += sizeof(*mhdr); + + copy = len; + if (copy > skb_tailroom(skb)) + copy = skb_tailroom(skb); + + memcpy(skb_put(skb, copy), p, copy); + + len -= copy; + + if (!len) { + give_a_page(vi, skb_shinfo(skb)->frags[0].page); + skb_shinfo(skb)->nr_frags--; + } else { + skb_shinfo(skb)->frags[0].page_offset += + sizeof(*mhdr) + copy; + skb_shinfo(skb)->frags[0].size = len; + skb->data_len += len; + skb->len += len; + } + + while (--mhdr->num_buffers) { + struct sk_buff *nskb; + + i = skb_shinfo(skb)->nr_frags; + if (i >= MAX_SKB_FRAGS) { + pr_debug("%s: packet too long %d\n", dev->name, + len); + dev->stats.rx_length_errors++; + goto drop; + } + + nskb = vi->rvq->vq_ops->get_buf(vi->rvq, &len); + if (!nskb) { + pr_debug("%s: rx error: %d buffers missing\n", + dev->name, mhdr->num_buffers); + dev->stats.rx_length_errors++; + goto drop; + } + + __skb_unlink(nskb, &vi->recv); + vi->num--; + + skb_shinfo(skb)->frags[i] = skb_shinfo(nskb)->frags[0]; + skb_shinfo(nskb)->nr_frags = 0; + kfree_skb(nskb); + + if (len > PAGE_SIZE) + len = PAGE_SIZE; + + skb_shinfo(skb)->frags[i].size = len; + skb_shinfo(skb)->nr_frags++; + skb->data_len += len; + skb->len += len; + } + } else { + len -= sizeof(struct virtio_net_hdr); + + if (len <= MAX_PACKET_LEN) + trim_pages(vi, skb); + + err = pskb_trim(skb, len); + if (err) { + pr_debug("%s: pskb_trim failed %i %d\n", dev->name, + len, err); + dev->stats.rx_dropped++; + goto drop; + } } + skb->truesize += skb->data_len; dev->stats.rx_bytes += skb->len; dev->stats.rx_packets++; @@ -198,7 +269,7 @@ drop: dev_kfree_skb(skb); } -static void try_fill_recv(struct virtnet_info *vi) +static void try_fill_recv_maxbufs(struct virtnet_info *vi) { struct sk_buff *skb; struct scatterlist sg[2+MAX_SKB_FRAGS]; @@ -206,12 +277,16 @@ static void try_fill_recv(struct virtnet_info *vi) sg_init_table(sg, 2+MAX_SKB_FRAGS); for (;;) { + struct virtio_net_hdr *hdr; + skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN); if (unlikely(!skb)) break; skb_put(skb, MAX_PACKET_LEN); - vnet_hdr_to_sg(sg, skb); + + hdr = skb_vnet_hdr(skb); + sg_init_one(sg, hdr, sizeof(*hdr)); if (vi->big_packets) { for (i = 0; i < MAX_SKB_FRAGS; i++) { @@ -247,6 +322,54 @@ static void try_fill_recv(struct virtnet_info *vi) vi->rvq->vq_ops->kick(vi->rvq); } +static void try_fill_recv(struct virtnet_info *vi) +{ + struct sk_buff *skb; + struct scatterlist sg[1]; + int err; + + if (!vi->mergeable_rx_bufs) { + try_fill_recv_maxbufs(vi); + return; + } + + for (;;) { + skb_frag_t *f; + + skb = netdev_alloc_skb(vi->dev, GOOD_COPY_LEN + NET_IP_ALIGN); + if (unlikely(!skb)) + break; + + skb_reserve(skb, NET_IP_ALIGN); + + f = &skb_shinfo(skb)->frags[0]; + f->page = get_a_page(vi, GFP_ATOMIC); + if (!f->page) { + kfree_skb(skb); + break; + } + + f->page_offset = 0; + f->size = PAGE_SIZE; + + skb_shinfo(skb)->nr_frags++; + + sg_init_one(sg, page_address(f->page), PAGE_SIZE); + skb_queue_head(&vi->recv, skb); + + err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb); + if (err) { + skb_unlink(skb, &vi->recv); + kfree_skb(skb); + break; + } + vi->num++; + } + if (unlikely(vi->num > vi->max)) + vi->max = vi->num; + vi->rvq->vq_ops->kick(vi->rvq); +} + static void skb_recv_done(struct virtqueue *rvq) { struct virtnet_info *vi = rvq->vdev->priv; @@ -325,15 +448,14 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) { int num, err; struct scatterlist sg[2+MAX_SKB_FRAGS]; - struct virtio_net_hdr *hdr; + struct virtio_net_hdr_mrg_rxbuf *mhdr = skb_vnet_hdr(skb); + struct virtio_net_hdr *hdr = skb_vnet_hdr(skb); const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest; sg_init_table(sg, 2+MAX_SKB_FRAGS); pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); - /* Encode metadata header at front. */ - hdr = skb_vnet_hdr(skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; hdr->csum_start = skb->csum_start - skb_headroom(skb); @@ -361,7 +483,14 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) hdr->gso_size = hdr->hdr_len = 0; } - vnet_hdr_to_sg(sg, skb); + mhdr->num_buffers = 0; + + /* Encode metadata header at front. */ + if (vi->mergeable_rx_bufs) + sg_init_one(sg, mhdr, sizeof(*mhdr)); + else + sg_init_one(sg, hdr, sizeof(*hdr)); + num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); @@ -551,6 +680,9 @@ static int virtnet_probe(struct virtio_device *vdev) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN)) vi->big_packets = true; + if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) + vi->mergeable_rx_bufs = true; + /* We expect two virtqueues, receive then send. */ vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); if (IS_ERR(vi->rvq)) { @@ -643,6 +775,7 @@ static unsigned int features[] = { VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_ECN, /* We don't yet handle UFO input. */ + VIRTIO_NET_F_MRG_RXBUF, VIRTIO_F_NOTIFY_ON_EMPTY, }; diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 5e33761b9b8a..5cdd0aa8bde9 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -20,6 +20,7 @@ #define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */ #define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ #define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ +#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */ struct virtio_net_config { @@ -44,4 +45,12 @@ struct virtio_net_hdr __u16 csum_start; /* Position to start checksumming from */ __u16 csum_offset; /* Offset after that to place checksum */ }; + +/* This is the version of the header to use when the MRG_RXBUF + * feature has been negotiated. */ +struct virtio_net_hdr_mrg_rxbuf { + struct virtio_net_hdr hdr; + __u16 num_buffers; /* Number of merged rx buffers */ +}; + #endif /* _LINUX_VIRTIO_NET_H */ -- cgit v1.2.3 From 49aebc66d6b896f9c7c5739d85c4548c00015aa7 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 16 Nov 2008 22:51:23 -0800 Subject: dccp: Deprecate old setsockopt framework The previous setsockopt interface, which passed socket options via struct dccp_so_feat, is complicated/difficult to use. Continuing to support it leads to ugly code since the old approach did not distinguish between NN and SP values. This patch removes the old setsockopt interface and replaces it with two new functions to register NN/SP values for feature negotiation. These are essentially wrappers around the internal __feat_register functions, with checking added to avoid * wrong usage (type); * changing values while the connection is in progress. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/linux/dccp.h | 7 ----- net/dccp/feat.c | 72 ++++++++++++++++++++-------------------------------- net/dccp/feat.h | 5 ++-- net/dccp/proto.c | 53 ++------------------------------------ 4 files changed, 32 insertions(+), 105 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index d3ac1bde60b4..6eaaca9b037a 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -193,13 +193,6 @@ enum dccp_feature_numbers { DCCPF_MAX_CCID_SPECIFIC = 255, }; -/* this structure is argument to DCCP_SOCKOPT_CHANGE_X */ -struct dccp_so_feat { - __u8 dccpsf_feat; - __u8 __user *dccpsf_val; - __u8 dccpsf_len; -}; - /* DCCP socket options */ #define DCCP_SOCKOPT_PACKET_SIZE 1 /* XXX deprecated, without effect */ #define DCCP_SOCKOPT_SERVICE 2 diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 4f86a48723f6..47ce9abaf72b 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -364,53 +364,35 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local, return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval); } -int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, - u8 *val, u8 len, gfp_t gfp) -{ - struct dccp_opt_pend *opt; - - dccp_feat_debug(type, feature, *val); - - if (len > 3) { - DCCP_WARN("invalid length %d\n", len); +/** + * dccp_feat_register_sp - Register requests to change SP feature values + * @sk: client or listening socket + * @feat: one of %dccp_feature_numbers + * @is_local: whether the local (1) or remote (0) @feat is meant + * @list: array of preferred values, in descending order of preference + * @len: length of @list in bytes + */ +int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, + u8 const *list, u8 len) +{ /* any changes must be registered before establishing the connection */ + if (sk->sk_state != DCCP_CLOSED) + return -EISCONN; + if (dccp_feat_type(feat) != FEAT_SP) return -EINVAL; - } - /* XXX add further sanity checks */ - - /* check if that feature is already being negotiated */ - list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { - /* ok we found a negotiation for this option already */ - if (opt->dccpop_feat == feature && opt->dccpop_type == type) { - dccp_pr_debug("Replacing old\n"); - /* replace */ - BUG_ON(opt->dccpop_val == NULL); - kfree(opt->dccpop_val); - opt->dccpop_val = val; - opt->dccpop_len = len; - opt->dccpop_conf = 0; - return 0; - } - } - - /* negotiation for a new feature */ - opt = kmalloc(sizeof(*opt), gfp); - if (opt == NULL) - return -ENOMEM; - - opt->dccpop_type = type; - opt->dccpop_feat = feature; - opt->dccpop_len = len; - opt->dccpop_val = val; - opt->dccpop_conf = 0; - opt->dccpop_sc = NULL; - - BUG_ON(opt->dccpop_val == NULL); - - list_add_tail(&opt->dccpop_node, &dmsk->dccpms_pending); - return 0; + return __feat_register_sp(&dccp_sk(sk)->dccps_featneg, feat, is_local, + 0, list, len); } -EXPORT_SYMBOL_GPL(dccp_feat_change); +/* Analogous to dccp_feat_register_sp(), but for non-negotiable values */ +int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val) +{ + /* any changes must be registered before establishing the connection */ + if (sk->sk_state != DCCP_CLOSED) + return -EISCONN; + if (dccp_feat_type(feat) != FEAT_NN) + return -EINVAL; + return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val); +} /* * Tracking features whose value depend on the choice of CCID @@ -1108,7 +1090,7 @@ int dccp_feat_init(struct sock *sk) /* Ack ratio */ rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0, - dmsk->dccpms_ack_ratio); + dp->dccps_l_ack_ratio); out: return rc; } diff --git a/net/dccp/feat.h b/net/dccp/feat.h index 232c653e69c5..4d172822df17 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -111,8 +111,9 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val) #define dccp_feat_debug(type, feat, val) #endif /* CONFIG_IP_DCCP_DEBUG */ -extern int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature, - u8 *val, u8 len, gfp_t gfp); +extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, + u8 const *list, u8 len); +extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val); extern int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len); extern int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 1117d4d8c8f1..3090fc40eebd 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -470,44 +470,6 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service, return 0; } -/* byte 1 is feature. the rest is the preference list */ -static int dccp_setsockopt_change(struct sock *sk, int type, - struct dccp_so_feat __user *optval) -{ - struct dccp_so_feat opt; - u8 *val; - int rc; - - if (copy_from_user(&opt, optval, sizeof(opt))) - return -EFAULT; - /* - * rfc4340: 6.1. Change Options - */ - if (opt.dccpsf_len < 1) - return -EINVAL; - - val = kmalloc(opt.dccpsf_len, GFP_KERNEL); - if (!val) - return -ENOMEM; - - if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) { - rc = -EFAULT; - goto out_free_val; - } - - rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat, - val, opt.dccpsf_len, GFP_KERNEL); - if (rc) - goto out_free_val; - -out: - return rc; - -out_free_val: - kfree(val); - goto out; -} - static int do_dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { @@ -530,20 +492,9 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, err = 0; break; case DCCP_SOCKOPT_CHANGE_L: - if (optlen != sizeof(struct dccp_so_feat)) - err = -EINVAL; - else - err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L, - (struct dccp_so_feat __user *) - optval); - break; case DCCP_SOCKOPT_CHANGE_R: - if (optlen != sizeof(struct dccp_so_feat)) - err = -EINVAL; - else - err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R, - (struct dccp_so_feat __user *) - optval); + DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n"); + err = 0; break; case DCCP_SOCKOPT_SERVER_TIMEWAIT: if (dp->dccps_role != DCCP_ROLE_SERVER) -- cgit v1.2.3 From 29450559849da7066813601effb7666966869853 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 16 Nov 2008 22:53:48 -0800 Subject: dccp: Feature negotiation for minimum-checksum-coverage This provides feature negotiation for server minimum checksum coverage which so far has been missing. Since sender/receiver coverage values range only from 0...15, their type has also been reduced in size from u16 to u4. Feature-negotiation options are now generated for both sender and receiver coverage, i.e. when the peer has `forgotten' to enable partial coverage then feature negotiation will automatically enable (negotiate) the partial coverage value for this connection. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- include/linux/dccp.h | 4 ++-- net/dccp/proto.c | 53 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 42 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6eaaca9b037a..5a5a89935dbc 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -527,8 +527,8 @@ struct dccp_sock { __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; - __u16 dccps_pcslen; - __u16 dccps_pcrlen; + __u8 dccps_pcslen:4; + __u8 dccps_pcrlen:4; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 3090fc40eebd..c6b4362bb1d7 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -470,6 +470,42 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service, return 0; } +static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx) +{ + u8 *list, len; + int i, rc; + + if (cscov < 0 || cscov > 15) + return -EINVAL; + /* + * Populate a list of permissible values, in the range cscov...15. This + * is necessary since feature negotiation of single values only works if + * both sides incidentally choose the same value. Since the list starts + * lowest-value first, negotiation will pick the smallest shared value. + */ + if (cscov == 0) + return 0; + len = 16 - cscov; + + list = kmalloc(len, GFP_KERNEL); + if (list == NULL) + return -ENOBUFS; + + for (i = 0; i < len; i++) + list[i] = cscov++; + + rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len); + + if (rc == 0) { + if (rx) + dccp_sk(sk)->dccps_pcrlen = cscov; + else + dccp_sk(sk)->dccps_pcslen = cscov; + } + kfree(list); + return rc; +} + static int do_dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { @@ -502,20 +538,11 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, else dp->dccps_server_timewait = (val != 0); break; - case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */ - if (val < 0 || val > 15) - err = -EINVAL; - else - dp->dccps_pcslen = val; + case DCCP_SOCKOPT_SEND_CSCOV: + err = dccp_setsockopt_cscov(sk, val, false); break; - case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */ - if (val < 0 || val > 15) - err = -EINVAL; - else { - dp->dccps_pcrlen = val; - /* FIXME: add feature negotiation, - * ChangeL(MinimumChecksumCoverage, val) */ - } + case DCCP_SOCKOPT_RECV_CSCOV: + err = dccp_setsockopt_cscov(sk, val, true); break; default: err = -ENOPROTOOPT; -- cgit v1.2.3 From dd9c0e363cef32b7d6f23d4c87e8dfe4f91fd1c5 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 16 Nov 2008 22:55:08 -0800 Subject: dccp: Deprecate Ack Ratio sysctl This patch deprecates the Ack Ratio sysctl, since * Ack Ratio is entirely ignored by CCID-3 and CCID-4, * Ack Ratio currently doesn't work in CCID-2 (i.e. is always set to 1); * even if it would work in CCID-2, there is no point for a user to change it: - Ack Ratio is constrained by cwnd (RFC 4341, 6.1.2), - if Ack Ratio > cwnd, the system resorts to spurious RTO timeouts (since waiting for Acks which will never arrive in this window), - cwnd is not a user-configurable value. The only reasonable place for Ack Ratio is to print it for debugging. It is planned to do this later on, as part of e.g. dccp_probe. With this patch Ack Ratio is now under full control of feature negotiation: * Ack Ratio is resolved as a dependency of the selected CCID; * if the chosen CCID supports it (i.e. CCID == CCID-2), Ack Ratio is set to the default of 2, following RFC 4340, 11.3 - "New connections start with Ack Ratio 2 for both endpoints"; * what happens then is part of another patch set, since it concerns the dynamic update of Ack Ratio while the connection is in full flight. Thanks to Tomasz Grobelny for discussion leading up to this patch. Signed-off-by: Gerrit Renker Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- Documentation/networking/dccp.txt | 3 --- include/linux/dccp.h | 2 -- net/dccp/dccp.h | 1 - net/dccp/minisocks.c | 1 - net/dccp/options.c | 1 - net/dccp/sysctl.c | 7 ------- 6 files changed, 15 deletions(-) (limited to 'include') diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt index f0aeb20fa63b..43df4487379b 100644 --- a/Documentation/networking/dccp.txt +++ b/Documentation/networking/dccp.txt @@ -125,9 +125,6 @@ send_ndp = 1 send_ackvec = 1 Whether or not to send Ack Vector options (sec. 11.5). -ack_ratio = 2 - The default Ack Ratio (sec. 11.3) to use. - tx_ccid = 2 Default CCID for the sender-receiver half-connection. diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 5a5a89935dbc..eda389ce04f4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -368,7 +368,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * @dccpms_ccid - Congestion Control Id (CCID) (section 10) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) - * @dccpms_ack_ratio - Ack Ratio Feature (section 11.3) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ @@ -378,7 +377,6 @@ struct dccp_minisock { __u8 dccpms_tx_ccid; __u8 dccpms_send_ack_vector; __u8 dccpms_send_ndp_count; - __u8 dccpms_ack_ratio; struct list_head dccpms_pending; struct list_head dccpms_conf; }; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index e656dafb5d96..031ce350d3c1 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -98,7 +98,6 @@ extern int sysctl_dccp_retries2; extern int sysctl_dccp_feat_sequence_window; extern int sysctl_dccp_feat_rx_ccid; extern int sysctl_dccp_feat_tx_ccid; -extern int sysctl_dccp_feat_ack_ratio; extern int sysctl_dccp_feat_send_ack_vector; extern int sysctl_dccp_feat_send_ndp_count; extern int sysctl_dccp_tx_qlen; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index afdacbb94d75..ed61bc58e41e 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -47,7 +47,6 @@ void dccp_minisock_init(struct dccp_minisock *dmsk) dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid; dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid; - dmsk->dccpms_ack_ratio = sysctl_dccp_feat_ack_ratio; dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count; } diff --git a/net/dccp/options.c b/net/dccp/options.c index 67a171a1268c..515ad45013ad 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -26,7 +26,6 @@ int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID; int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; -int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO; int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 21295993fdb8..f6e54f433e29 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -40,13 +40,6 @@ static struct ctl_table dccp_default_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "ack_ratio", - .data = &sysctl_dccp_feat_ack_ratio, - .maxlen = sizeof(sysctl_dccp_feat_ack_ratio), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "send_ackvec", .data = &sysctl_dccp_feat_send_ack_vector, -- cgit v1.2.3 From 4d24b52ac5085ef8a264d044f1b302b7c029887a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 16 Nov 2008 23:01:49 -0800 Subject: ematch: simpler tcf_em_unregister() Simply delete ops from list and let list debugging do the job. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- net/sched/ematch.c | 18 +++--------------- 2 files changed, 4 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index aa9e282db485..d1ca31444644 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -246,7 +246,7 @@ struct tcf_ematch_ops }; extern int tcf_em_register(struct tcf_ematch_ops *); -extern int tcf_em_unregister(struct tcf_ematch_ops *); +extern void tcf_em_unregister(struct tcf_ematch_ops *); extern int tcf_em_tree_validate(struct tcf_proto *, struct nlattr *, struct tcf_ematch_tree *); extern void tcf_em_tree_destroy(struct tcf_proto *, struct tcf_ematch_tree *); diff --git a/net/sched/ematch.c b/net/sched/ematch.c index e82519e548d7..aab59409728b 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -71,7 +71,7 @@ * * static void __exit exit_my_ematch(void) * { - * return tcf_em_unregister(&my_ops); + * tcf_em_unregister(&my_ops); * } * * module_init(init_my_ematch); @@ -154,23 +154,11 @@ EXPORT_SYMBOL(tcf_em_register); * * Returns -ENOENT if no matching ematch was found. */ -int tcf_em_unregister(struct tcf_ematch_ops *ops) +void tcf_em_unregister(struct tcf_ematch_ops *ops) { - int err = 0; - struct tcf_ematch_ops *e; - write_lock(&ematch_mod_lock); - list_for_each_entry(e, &ematch_ops, link) { - if (e == ops) { - list_del(&e->link); - goto out; - } - } - - err = -ENOENT; -out: + list_del(&ops->link); write_unlock(&ematch_mod_lock); - return err; } EXPORT_SYMBOL(tcf_em_unregister); -- cgit v1.2.3 From 1cad1de1b216b355a60d907c103b2daf1a285345 Mon Sep 17 00:00:00 2001 From: Christian Pellegrin Date: Sat, 15 Nov 2008 08:58:16 +0100 Subject: ASoC: UDA134x codec driver Signed-off-by: Christian Pellegrin Signed-off-by: Mark Brown --- include/sound/l3.h | 18 ++ include/sound/uda134x.h | 26 ++ sound/soc/codecs/Kconfig | 8 + sound/soc/codecs/Makefile | 4 + sound/soc/codecs/l3.c | 91 ++++++ sound/soc/codecs/uda134x.c | 656 +++++++++++++++++++++++++++++++++++++++ sound/soc/codecs/uda134x_codec.h | 36 +++ 7 files changed, 839 insertions(+) create mode 100644 include/sound/l3.h create mode 100644 include/sound/uda134x.h create mode 100644 sound/soc/codecs/l3.c create mode 100644 sound/soc/codecs/uda134x.c create mode 100644 sound/soc/codecs/uda134x_codec.h (limited to 'include') diff --git a/include/sound/l3.h b/include/sound/l3.h new file mode 100644 index 000000000000..423a08f0f1b0 --- /dev/null +++ b/include/sound/l3.h @@ -0,0 +1,18 @@ +#ifndef _L3_H_ +#define _L3_H_ 1 + +struct l3_pins { + void (*setdat)(int); + void (*setclk)(int); + void (*setmode)(int); + int data_hold; + int data_setup; + int clock_high; + int mode_hold; + int mode; + int mode_setup; +}; + +int l3_write(struct l3_pins *adap, u8 addr, u8 *data, int len); + +#endif diff --git a/include/sound/uda134x.h b/include/sound/uda134x.h new file mode 100644 index 000000000000..475ef8bb7dcd --- /dev/null +++ b/include/sound/uda134x.h @@ -0,0 +1,26 @@ +/* + * uda134x.h -- UDA134x ALSA SoC Codec driver + * + * Copyright 2007 Dension Audio Systems Ltd. + * Author: Zoltan Devai + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _UDA134X_H +#define _UDA134X_H + +#include + +struct uda134x_platform_data { + struct l3_pins l3; + void (*power) (int); + int model; +#define UDA134X_UDA1340 1 +#define UDA134X_UDA1341 2 +#define UDA134X_UDA1344 3 +}; + +#endif /* _UDA134X_H */ diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 8a84460a6f74..04f49f5c3c3d 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -10,6 +10,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_TLV320AIC26 if SPI_MASTER select SND_SOC_TLV320AIC3X if I2C select SND_SOC_TWL4030 if TWL4030_CORE + select SND_SOC_UDA134X select SND_SOC_UDA1380 if I2C select SND_SOC_WM8510 if (I2C || SPI_MASTER) select SND_SOC_WM8580 if I2C @@ -66,6 +67,9 @@ config SND_SOC_CS4270_VD33_ERRATA bool depends on SND_SOC_CS4270 +config SND_SOC_L3 + tristate + config SND_SOC_SSM2602 tristate @@ -85,6 +89,10 @@ config SND_SOC_TWL4030 tristate depends on TWL4030_CORE +config SND_SOC_UDA134X + tristate + select SND_SOC_L3 + config SND_SOC_UDA1380 tristate diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 7ae17a6ea271..de6572356d1b 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -3,11 +3,13 @@ snd-soc-ad1980-objs := ad1980.o snd-soc-ad73311-objs := ad73311.o snd-soc-ak4535-objs := ak4535.o snd-soc-cs4270-objs := cs4270.o +snd-soc-l3-objs := l3.o snd-soc-ssm2602-objs := ssm2602.o snd-soc-tlv320aic23-objs := tlv320aic23.o snd-soc-tlv320aic26-objs := tlv320aic26.o snd-soc-tlv320aic3x-objs := tlv320aic3x.o snd-soc-twl4030-objs := twl4030.o +snd-soc-uda134x-objs := uda134x.o snd-soc-uda1380-objs := uda1380.o snd-soc-wm8510-objs := wm8510.o snd-soc-wm8580-objs := wm8580.o @@ -27,11 +29,13 @@ obj-$(CONFIG_SND_SOC_AD1980) += snd-soc-ad1980.o obj-$(CONFIG_SND_SOC_AD73311) += snd-soc-ad73311.o obj-$(CONFIG_SND_SOC_AK4535) += snd-soc-ak4535.o obj-$(CONFIG_SND_SOC_CS4270) += snd-soc-cs4270.o +obj-$(CONFIG_SND_SOC_L3) += snd-soc-l3.o obj-$(CONFIG_SND_SOC_SSM2602) += snd-soc-ssm2602.o obj-$(CONFIG_SND_SOC_TLV320AIC23) += snd-soc-tlv320aic23.o obj-$(CONFIG_SND_SOC_TLV320AIC26) += snd-soc-tlv320aic26.o obj-$(CONFIG_SND_SOC_TLV320AIC3X) += snd-soc-tlv320aic3x.o obj-$(CONFIG_SND_SOC_TWL4030) += snd-soc-twl4030.o +obj-$(CONFIG_SND_SOC_UDA134X) += snd-soc-uda134x.o obj-$(CONFIG_SND_SOC_UDA1380) += snd-soc-uda1380.o obj-$(CONFIG_SND_SOC_WM8510) += snd-soc-wm8510.o obj-$(CONFIG_SND_SOC_WM8580) += snd-soc-wm8580.o diff --git a/sound/soc/codecs/l3.c b/sound/soc/codecs/l3.c new file mode 100644 index 000000000000..5353af58862c --- /dev/null +++ b/sound/soc/codecs/l3.c @@ -0,0 +1,91 @@ +/* + * L3 code + * + * Copyright (C) 2008, Christian Pellegrin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * + * based on: + * + * L3 bus algorithm module. + * + * Copyright (C) 2001 Russell King, All Rights Reserved. + * + * + */ + +#include +#include +#include + +#include + +/* + * Send one byte of data to the chip. Data is latched into the chip on + * the rising edge of the clock. + */ +static void sendbyte(struct l3_pins *adap, unsigned int byte) +{ + int i; + + for (i = 0; i < 8; i++) { + adap->setclk(0); + udelay(adap->data_hold); + adap->setdat(byte & 1); + udelay(adap->data_setup); + adap->setclk(1); + udelay(adap->clock_high); + byte >>= 1; + } +} + +/* + * Send a set of bytes to the chip. We need to pulse the MODE line + * between each byte, but never at the start nor at the end of the + * transfer. + */ +static void sendbytes(struct l3_pins *adap, const u8 *buf, + int len) +{ + int i; + + for (i = 0; i < len; i++) { + if (i) { + udelay(adap->mode_hold); + adap->setmode(0); + udelay(adap->mode); + } + adap->setmode(1); + udelay(adap->mode_setup); + sendbyte(adap, buf[i]); + } +} + +int l3_write(struct l3_pins *adap, u8 addr, u8 *data, int len) +{ + adap->setclk(1); + adap->setdat(1); + adap->setmode(1); + udelay(adap->mode); + + adap->setmode(0); + udelay(adap->mode_setup); + sendbyte(adap, addr); + udelay(adap->mode_hold); + + sendbytes(adap, data, len); + + adap->setclk(1); + adap->setdat(1); + adap->setmode(0); + + return len; +} +EXPORT_SYMBOL_GPL(l3_write); + +MODULE_DESCRIPTION("L3 bit-banging driver"); +MODULE_AUTHOR("Christian Pellegrin "); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c new file mode 100644 index 000000000000..04b30da10228 --- /dev/null +++ b/sound/soc/codecs/uda134x.c @@ -0,0 +1,656 @@ +/* + * uda134x.c -- UDA134X ALSA SoC Codec driver + * + * Modifications by Christian Pellegrin + * + * Copyright 2007 Dension Audio Systems Ltd. + * Author: Zoltan Devai + * + * Based on the WM87xx drivers by Liam Girdwood and Richard Purdie + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "uda134x_codec.h" + + +#define POWER_OFF_ON_STANDBY 1 +/* + ALSA SOC usually puts the device in standby mode when it's not used + for sometime. If you define POWER_OFF_ON_STANDBY the driver will + turn off the ADC/DAC when this callback is invoked and turn it back + on when needed. Unfortunately this will result in a very light bump + (it can be audible only with good earphones). If this bothers you + just comment this line, you will have slightly higher power + consumption . Please note that sending the L3 command for ADC is + enough to make the bump, so it doesn't make difference if you + completely take off power from the codec. + */ + +#define UDA134X_RATES SNDRV_PCM_RATE_8000_48000 +#define UDA134X_FORMATS (SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_LE | \ + SNDRV_PCM_FMTBIT_S18_3LE | SNDRV_PCM_FMTBIT_S20_3LE) + +struct uda134x_priv { + int sysclk; + int dai_fmt; + + struct snd_pcm_substream *master_substream; + struct snd_pcm_substream *slave_substream; +}; + +/* In-data addresses are hard-coded into the reg-cache values */ +static const char uda134x_reg[UDA134X_REGS_NUM] = { + /* Extended address registers */ + 0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, + /* Status, data regs */ + 0x00, 0x83, 0x00, 0x40, 0x80, 0x00, +}; + +/* + * The codec has no support for reading its registers except for peak level... + */ +static inline unsigned int uda134x_read_reg_cache(struct snd_soc_codec *codec, + unsigned int reg) +{ + u8 *cache = codec->reg_cache; + + if (reg >= UDA134X_REGS_NUM) + return -1; + return cache[reg]; +} + +/* + * Write the register cache + */ +static inline void uda134x_write_reg_cache(struct snd_soc_codec *codec, + u8 reg, unsigned int value) +{ + u8 *cache = codec->reg_cache; + + if (reg >= UDA134X_REGS_NUM) + return; + cache[reg] = value; +} + +/* + * Write to the uda134x registers + * + */ +static int uda134x_write(struct snd_soc_codec *codec, unsigned int reg, + unsigned int value) +{ + int ret; + u8 addr; + u8 data = value; + struct uda134x_platform_data *pd = codec->control_data; + + pr_debug("%s reg: %02X, value:%02X\n", __func__, reg, value); + + if (reg >= UDA134X_REGS_NUM) { + printk(KERN_ERR "%s unkown register: reg: %d", + __func__, reg); + return -EINVAL; + } + + uda134x_write_reg_cache(codec, reg, value); + + switch (reg) { + case UDA134X_STATUS0: + case UDA134X_STATUS1: + addr = UDA134X_STATUS_ADDR; + break; + case UDA134X_DATA000: + case UDA134X_DATA001: + case UDA134X_DATA010: + addr = UDA134X_DATA0_ADDR; + break; + case UDA134X_DATA1: + addr = UDA134X_DATA1_ADDR; + break; + default: + /* It's an extended address register */ + addr = (reg | UDA134X_EXTADDR_PREFIX); + + ret = l3_write(&pd->l3, + UDA134X_DATA0_ADDR, &addr, 1); + if (ret != 1) + return -EIO; + + addr = UDA134X_DATA0_ADDR; + data = (value | UDA134X_EXTDATA_PREFIX); + break; + } + + ret = l3_write(&pd->l3, + addr, &data, 1); + if (ret != 1) + return -EIO; + + return 0; +} + +static inline void uda134x_reset(struct snd_soc_codec *codec) +{ + u8 reset_reg = uda134x_read_reg_cache(codec, UDA134X_STATUS0); + uda134x_write(codec, UDA134X_STATUS0, reset_reg | (1<<6)); + msleep(1); + uda134x_write(codec, UDA134X_STATUS0, reset_reg & ~(1<<6)); +} + +static int uda134x_mute(struct snd_soc_dai *dai, int mute) +{ + struct snd_soc_codec *codec = dai->codec; + u8 mute_reg = uda134x_read_reg_cache(codec, UDA134X_DATA010); + + pr_debug("%s mute: %d\n", __func__, mute); + + if (mute) + mute_reg |= (1<<2); + else + mute_reg &= ~(1<<2); + + uda134x_write(codec, UDA134X_DATA010, mute_reg & ~(1<<2)); + + return 0; +} + +static int uda134x_startup(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->codec; + struct uda134x_priv *uda134x = codec->private_data; + struct snd_pcm_runtime *master_runtime; + + if (uda134x->master_substream) { + master_runtime = uda134x->master_substream->runtime; + + pr_debug("%s constraining to %d bits at %d\n", __func__, + master_runtime->sample_bits, + master_runtime->rate); + + snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_RATE, + master_runtime->rate, + master_runtime->rate); + + snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_SAMPLE_BITS, + master_runtime->sample_bits, + master_runtime->sample_bits); + + uda134x->slave_substream = substream; + } else + uda134x->master_substream = substream; + + return 0; +} + +static void uda134x_shutdown(struct snd_pcm_substream *substream) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->codec; + struct uda134x_priv *uda134x = codec->private_data; + + if (uda134x->master_substream == substream) + uda134x->master_substream = uda134x->slave_substream; + + uda134x->slave_substream = NULL; +} + +static int uda134x_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_codec *codec = socdev->codec; + struct uda134x_priv *uda134x = codec->private_data; + u8 hw_params; + + if (substream == uda134x->slave_substream) { + pr_debug("%s ignoring hw_params for slave substream\n", + __func__); + return 0; + } + + hw_params = uda134x_read_reg_cache(codec, UDA134X_STATUS0); + hw_params &= STATUS0_SYSCLK_MASK; + hw_params &= STATUS0_DAIFMT_MASK; + + pr_debug("%s sysclk: %d, rate:%d\n", __func__, + uda134x->sysclk, params_rate(params)); + + /* set SYSCLK / fs ratio */ + switch (uda134x->sysclk / params_rate(params)) { + case 512: + break; + case 384: + hw_params |= (1<<4); + break; + case 256: + hw_params |= (1<<5); + break; + default: + printk(KERN_ERR "%s unsupported fs\n", __func__); + return -EINVAL; + } + + pr_debug("%s dai_fmt: %d, params_format:%d\n", __func__, + uda134x->dai_fmt, params_format(params)); + + /* set DAI format and word length */ + switch (uda134x->dai_fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + break; + case SND_SOC_DAIFMT_RIGHT_J: + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_S16_LE: + hw_params |= (1<<1); + break; + case SNDRV_PCM_FORMAT_S18_3LE: + hw_params |= (1<<2); + break; + case SNDRV_PCM_FORMAT_S20_3LE: + hw_params |= ((1<<2) | (1<<1)); + break; + default: + printk(KERN_ERR "%s unsupported format (right)\n", + __func__); + return -EINVAL; + } + break; + case SND_SOC_DAIFMT_LEFT_J: + hw_params |= (1<<3); + break; + default: + printk(KERN_ERR "%s unsupported format\n", __func__); + return -EINVAL; + } + + uda134x_write(codec, UDA134X_STATUS0, hw_params); + + return 0; +} + +static int uda134x_set_dai_sysclk(struct snd_soc_dai *codec_dai, + int clk_id, unsigned int freq, int dir) +{ + struct snd_soc_codec *codec = codec_dai->codec; + struct uda134x_priv *uda134x = codec->private_data; + + pr_debug("%s clk_id: %d, freq: %d, dir: %d\n", __func__, + clk_id, freq, dir); + + /* Anything between 256fs*8Khz and 512fs*48Khz should be acceptable + because the codec is slave. Of course limitations of the clock + master (the IIS controller) apply. + We'll error out on set_hw_params if it's not OK */ + if ((freq >= (256 * 8000)) && (freq <= (512 * 48000))) { + uda134x->sysclk = freq; + return 0; + } + + printk(KERN_ERR "%s unsupported sysclk\n", __func__); + return -EINVAL; +} + +static int uda134x_set_dai_fmt(struct snd_soc_dai *codec_dai, + unsigned int fmt) +{ + struct snd_soc_codec *codec = codec_dai->codec; + struct uda134x_priv *uda134x = codec->private_data; + + pr_debug("%s fmt: %08X\n", __func__, fmt); + + /* codec supports only full slave mode */ + if ((fmt & SND_SOC_DAIFMT_MASTER_MASK) != SND_SOC_DAIFMT_CBS_CFS) { + printk(KERN_ERR "%s unsupported slave mode\n", __func__); + return -EINVAL; + } + + /* no support for clock inversion */ + if ((fmt & SND_SOC_DAIFMT_INV_MASK) != SND_SOC_DAIFMT_NB_NF) { + printk(KERN_ERR "%s unsupported clock inversion\n", __func__); + return -EINVAL; + } + + /* We can't setup DAI format here as it depends on the word bit num */ + /* so let's just store the value for later */ + uda134x->dai_fmt = fmt; + + return 0; +} + +static int uda134x_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) +{ + u8 reg; + struct uda134x_platform_data *pd = codec->control_data; + int i; + u8 *cache = codec->reg_cache; + + pr_debug("%s bias level %d\n", __func__, level); + + switch (level) { + case SND_SOC_BIAS_ON: + /* ADC, DAC on */ + reg = uda134x_read_reg_cache(codec, UDA134X_STATUS1); + uda134x_write(codec, UDA134X_STATUS1, reg | 0x03); + break; + case SND_SOC_BIAS_PREPARE: + /* power on */ + if (pd->power) { + pd->power(1); + /* Sync reg_cache with the hardware */ + for (i = 0; i < ARRAY_SIZE(uda134x_reg); i++) + codec->write(codec, i, *cache++); + } + break; + case SND_SOC_BIAS_STANDBY: + /* ADC, DAC power off */ + reg = uda134x_read_reg_cache(codec, UDA134X_STATUS1); + uda134x_write(codec, UDA134X_STATUS1, reg & ~(0x03)); + break; + case SND_SOC_BIAS_OFF: + /* power off */ + if (pd->power) + pd->power(0); + break; + } + codec->bias_level = level; + return 0; +} + +static const char *uda134x_dsp_setting[] = {"Flat", "Minimum1", + "Minimum2", "Maximum"}; +static const char *uda134x_deemph[] = {"None", "32Khz", "44.1Khz", "48Khz"}; +static const char *uda134x_mixmode[] = {"Differential", "Analog1", + "Analog2", "Both"}; + +static const struct soc_enum uda134x_mixer_enum[] = { +SOC_ENUM_SINGLE(UDA134X_DATA010, 0, 0x04, uda134x_dsp_setting), +SOC_ENUM_SINGLE(UDA134X_DATA010, 3, 0x04, uda134x_deemph), +SOC_ENUM_SINGLE(UDA134X_EA010, 0, 0x04, uda134x_mixmode), +}; + +static const struct snd_kcontrol_new uda1341_snd_controls[] = { +SOC_SINGLE("Master Playback Volume", UDA134X_DATA000, 0, 0x3F, 1), +SOC_SINGLE("Capture Volume", UDA134X_EA010, 2, 0x07, 0), +SOC_SINGLE("Analog1 Volume", UDA134X_EA000, 0, 0x1F, 1), +SOC_SINGLE("Analog2 Volume", UDA134X_EA001, 0, 0x1F, 1), + +SOC_SINGLE("Mic Sensitivity", UDA134X_EA010, 2, 7, 0), +SOC_SINGLE("Mic Volume", UDA134X_EA101, 0, 0x1F, 0), + +SOC_SINGLE("Tone Control - Bass", UDA134X_DATA001, 2, 0xF, 0), +SOC_SINGLE("Tone Control - Treble", UDA134X_DATA001, 0, 3, 0), + +SOC_ENUM("Sound Processing Filter", uda134x_mixer_enum[0]), +SOC_ENUM("PCM Playback De-emphasis", uda134x_mixer_enum[1]), +SOC_ENUM("Input Mux", uda134x_mixer_enum[2]), + +SOC_SINGLE("AGC Switch", UDA134X_EA100, 4, 1, 0), +SOC_SINGLE("AGC Target Volume", UDA134X_EA110, 0, 0x03, 1), +SOC_SINGLE("AGC Timing", UDA134X_EA110, 2, 0x07, 0), + +SOC_SINGLE("DAC +6dB Switch", UDA134X_STATUS1, 6, 1, 0), +SOC_SINGLE("ADC +6dB Switch", UDA134X_STATUS1, 5, 1, 0), +SOC_SINGLE("ADC Polarity Switch", UDA134X_STATUS1, 4, 1, 0), +SOC_SINGLE("DAC Polarity Switch", UDA134X_STATUS1, 3, 1, 0), +SOC_SINGLE("Double Speed Playback Switch", UDA134X_STATUS1, 2, 1, 0), +SOC_SINGLE("DC Filter Enable Switch", UDA134X_STATUS0, 0, 1, 0), +}; + +static const struct snd_kcontrol_new uda1340_snd_controls[] = { +SOC_SINGLE("Master Playback Volume", UDA134X_DATA000, 0, 0x3F, 1), + +SOC_SINGLE("Tone Control - Bass", UDA134X_DATA001, 2, 0xF, 0), +SOC_SINGLE("Tone Control - Treble", UDA134X_DATA001, 0, 3, 0), + +SOC_ENUM("Sound Processing Filter", uda134x_mixer_enum[0]), +SOC_ENUM("PCM Playback De-emphasis", uda134x_mixer_enum[1]), + +SOC_SINGLE("DC Filter Enable Switch", UDA134X_STATUS0, 0, 1, 0), +}; + +static int uda134x_add_controls(struct snd_soc_codec *codec) +{ + int err, i, n; + const struct snd_kcontrol_new *ctrls; + struct uda134x_platform_data *pd = codec->control_data; + + switch (pd->model) { + case UDA134X_UDA1340: + case UDA134X_UDA1344: + n = ARRAY_SIZE(uda1340_snd_controls); + ctrls = uda1340_snd_controls; + break; + case UDA134X_UDA1341: + n = ARRAY_SIZE(uda1341_snd_controls); + ctrls = uda1341_snd_controls; + break; + default: + printk(KERN_ERR "%s unkown codec type: %d", + __func__, pd->model); + return -EINVAL; + } + + for (i = 0; i < n; i++) { + err = snd_ctl_add(codec->card, + snd_soc_cnew(&ctrls[i], + codec, NULL)); + if (err < 0) + return err; + } + + return 0; +} + +struct snd_soc_dai uda134x_dai = { + .name = "UDA134X", + /* playback capabilities */ + .playback = { + .stream_name = "Playback", + .channels_min = 1, + .channels_max = 2, + .rates = UDA134X_RATES, + .formats = UDA134X_FORMATS, + }, + /* capture capabilities */ + .capture = { + .stream_name = "Capture", + .channels_min = 1, + .channels_max = 2, + .rates = UDA134X_RATES, + .formats = UDA134X_FORMATS, + }, + /* pcm operations */ + .ops = { + .startup = uda134x_startup, + .shutdown = uda134x_shutdown, + .hw_params = uda134x_hw_params, + }, + /* DAI operations */ + .dai_ops = { + .digital_mute = uda134x_mute, + .set_sysclk = uda134x_set_dai_sysclk, + .set_fmt = uda134x_set_dai_fmt, + } +}; +EXPORT_SYMBOL(uda134x_dai); + + +static int uda134x_soc_probe(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec; + struct uda134x_priv *uda134x; + void *codec_setup_data = socdev->codec_data; + int ret = -ENOMEM; + struct uda134x_platform_data *pd; + + printk(KERN_INFO "UDA134X SoC Audio Codec\n"); + + if (!codec_setup_data) { + printk(KERN_ERR "UDA134X SoC codec: " + "missing L3 bitbang function\n"); + return -ENODEV; + } + + pd = codec_setup_data; + switch (pd->model) { + case UDA134X_UDA1340: + case UDA134X_UDA1341: + case UDA134X_UDA1344: + break; + default: + printk(KERN_ERR "UDA134X SoC codec: " + "unsupported model %d\n", + pd->model); + return -EINVAL; + } + + socdev->codec = kzalloc(sizeof(struct snd_soc_codec), GFP_KERNEL); + if (socdev->codec == NULL) + return ret; + + codec = socdev->codec; + + uda134x = kzalloc(sizeof(struct uda134x_priv), GFP_KERNEL); + if (uda134x == NULL) + goto priv_err; + codec->private_data = uda134x; + + codec->reg_cache = kmemdup(uda134x_reg, sizeof(uda134x_reg), + GFP_KERNEL); + if (codec->reg_cache == NULL) + goto reg_err; + + mutex_init(&codec->mutex); + + codec->reg_cache_size = sizeof(uda134x_reg); + codec->reg_cache_step = 1; + + codec->name = "UDA134X"; + codec->owner = THIS_MODULE; + codec->dai = &uda134x_dai; + codec->num_dai = 1; + codec->read = uda134x_read_reg_cache; + codec->write = uda134x_write; +#ifdef POWER_OFF_ON_STANDBY + codec->set_bias_level = uda134x_set_bias_level; +#endif + INIT_LIST_HEAD(&codec->dapm_widgets); + INIT_LIST_HEAD(&codec->dapm_paths); + + codec->control_data = codec_setup_data; + + if (pd->power) + pd->power(1); + + uda134x_reset(codec); + + /* register pcms */ + ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1); + if (ret < 0) { + printk(KERN_ERR "UDA134X: failed to register pcms\n"); + goto pcm_err; + } + + ret = uda134x_add_controls(codec); + if (ret < 0) { + printk(KERN_ERR "UDA134X: failed to register controls\n"); + goto pcm_err; + } + + ret = snd_soc_register_card(socdev); + if (ret < 0) { + printk(KERN_ERR "UDA134X: failed to register card\n"); + goto card_err; + } + + return 0; + +card_err: + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); +pcm_err: + kfree(codec->reg_cache); +reg_err: + kfree(codec->private_data); +priv_err: + kfree(codec); + return ret; +} + +/* power down chip */ +static int uda134x_soc_remove(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->codec; + + uda134x_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + uda134x_set_bias_level(codec, SND_SOC_BIAS_OFF); + + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); + + kfree(codec->private_data); + kfree(codec->reg_cache); + kfree(codec); + + return 0; +} + +#if defined(CONFIG_PM) +static int uda134x_soc_suspend(struct platform_device *pdev, + pm_message_t state) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->codec; + + uda134x_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + uda134x_set_bias_level(codec, SND_SOC_BIAS_OFF); + return 0; +} + +static int uda134x_soc_resume(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->codec; + + uda134x_set_bias_level(codec, SND_SOC_BIAS_PREPARE); + uda134x_set_bias_level(codec, SND_SOC_BIAS_ON); + return 0; +} +#else +#define uda134x_soc_suspend NULL +#define uda134x_soc_resume NULL +#endif /* CONFIG_PM */ + +struct snd_soc_codec_device soc_codec_dev_uda134x = { + .probe = uda134x_soc_probe, + .remove = uda134x_soc_remove, + .suspend = uda134x_soc_suspend, + .resume = uda134x_soc_resume, +}; +EXPORT_SYMBOL_GPL(soc_codec_dev_uda134x); + +MODULE_DESCRIPTION("UDA134X ALSA soc codec driver"); +MODULE_AUTHOR("Zoltan Devai, Christian Pellegrin "); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/codecs/uda134x_codec.h b/sound/soc/codecs/uda134x_codec.h new file mode 100644 index 000000000000..94f440490b31 --- /dev/null +++ b/sound/soc/codecs/uda134x_codec.h @@ -0,0 +1,36 @@ +#ifndef _UDA134X_CODEC_H +#define _UDA134X_CODEC_H + +#define UDA134X_L3ADDR 5 +#define UDA134X_DATA0_ADDR ((UDA134X_L3ADDR << 2) | 0) +#define UDA134X_DATA1_ADDR ((UDA134X_L3ADDR << 2) | 1) +#define UDA134X_STATUS_ADDR ((UDA134X_L3ADDR << 2) | 2) + +#define UDA134X_EXTADDR_PREFIX 0xC0 +#define UDA134X_EXTDATA_PREFIX 0xE0 + +/* UDA134X registers */ +#define UDA134X_EA000 0 +#define UDA134X_EA001 1 +#define UDA134X_EA010 2 +#define UDA134X_EA011 3 +#define UDA134X_EA100 4 +#define UDA134X_EA101 5 +#define UDA134X_EA110 6 +#define UDA134X_EA111 7 +#define UDA134X_STATUS0 8 +#define UDA134X_STATUS1 9 +#define UDA134X_DATA000 10 +#define UDA134X_DATA001 11 +#define UDA134X_DATA010 12 +#define UDA134X_DATA1 13 + +#define UDA134X_REGS_NUM 14 + +#define STATUS0_DAIFMT_MASK (~(7<<1)) +#define STATUS0_SYSCLK_MASK (~(3<<4)) + +extern struct snd_soc_dai uda134x_dai; +extern struct snd_soc_codec_device soc_codec_dev_uda134x; + +#endif -- cgit v1.2.3 From 7ad933d7a6677c20ce1bdb17425e732cf1ebee8a Mon Sep 17 00:00:00 2001 From: Christian Pellegrin Date: Sat, 15 Nov 2008 08:58:32 +0100 Subject: ASoC: Machine driver for for s3c24xx with uda134x Signed-off-by: Christian Pellegrin Signed-off-by: Mark Brown --- include/sound/s3c24xx_uda134x.h | 14 ++ sound/soc/s3c24xx/Kconfig | 5 + sound/soc/s3c24xx/Makefile | 2 + sound/soc/s3c24xx/s3c24xx_uda134x.c | 374 ++++++++++++++++++++++++++++++++++++ 4 files changed, 395 insertions(+) create mode 100644 include/sound/s3c24xx_uda134x.h create mode 100644 sound/soc/s3c24xx/s3c24xx_uda134x.c (limited to 'include') diff --git a/include/sound/s3c24xx_uda134x.h b/include/sound/s3c24xx_uda134x.h new file mode 100644 index 000000000000..33df4cb909d3 --- /dev/null +++ b/include/sound/s3c24xx_uda134x.h @@ -0,0 +1,14 @@ +#ifndef _S3C24XX_UDA134X_H_ +#define _S3C24XX_UDA134X_H_ 1 + +#include + +struct s3c24xx_uda134x_platform_data { + int l3_clk; + int l3_mode; + int l3_data; + void (*power) (int); + int model; +}; + +#endif diff --git a/sound/soc/s3c24xx/Kconfig b/sound/soc/s3c24xx/Kconfig index b9f2353effeb..fcd03acf10f6 100644 --- a/sound/soc/s3c24xx/Kconfig +++ b/sound/soc/s3c24xx/Kconfig @@ -44,3 +44,8 @@ config SND_S3C24XX_SOC_LN2440SBC_ALC650 Say Y if you want to add support for SoC audio on ln2440sbc with the ALC650. +config SND_S3C24XX_SOC_S3C24XX_UDA134X + tristate "SoC I2S Audio support UDA134X wired to a S3C24XX" + depends on SND_S3C24XX_SOC + select SND_S3C24XX_SOC_I2S + select SND_SOC_UDA134X diff --git a/sound/soc/s3c24xx/Makefile b/sound/soc/s3c24xx/Makefile index 0aa5fb0b9700..96b3f3f617d4 100644 --- a/sound/soc/s3c24xx/Makefile +++ b/sound/soc/s3c24xx/Makefile @@ -13,7 +13,9 @@ obj-$(CONFIG_SND_S3C2412_SOC_I2S) += snd-soc-s3c2412-i2s.o snd-soc-neo1973-wm8753-objs := neo1973_wm8753.o snd-soc-smdk2443-wm9710-objs := smdk2443_wm9710.o snd-soc-ln2440sbc-alc650-objs := ln2440sbc_alc650.o +snd-soc-s3c24xx-uda134x-objs := s3c24xx_uda134x.o obj-$(CONFIG_SND_S3C24XX_SOC_NEO1973_WM8753) += snd-soc-neo1973-wm8753.o obj-$(CONFIG_SND_S3C24XX_SOC_SMDK2443_WM9710) += snd-soc-smdk2443-wm9710.o obj-$(CONFIG_SND_S3C24XX_SOC_LN2440SBC_ALC650) += snd-soc-ln2440sbc-alc650.o +obj-$(CONFIG_SND_S3C24XX_SOC_S3C24XX_UDA134X) += snd-soc-s3c24xx-uda134x.o diff --git a/sound/soc/s3c24xx/s3c24xx_uda134x.c b/sound/soc/s3c24xx/s3c24xx_uda134x.c new file mode 100644 index 000000000000..29a68132f169 --- /dev/null +++ b/sound/soc/s3c24xx/s3c24xx_uda134x.c @@ -0,0 +1,374 @@ +/* + * Modifications by Christian Pellegrin + * + * s3c24xx_uda134x.c -- S3C24XX_UDA134X ALSA SoC Audio board driver + * + * Copyright 2007 Dension Audio Systems Ltd. + * Author: Zoltan Devai + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "s3c24xx-pcm.h" +#include "s3c24xx-i2s.h" +#include "../codecs/uda134x_codec.h" + + +/* #define ENFORCE_RATES 1 */ +/* + Unfortunately the S3C24XX in master mode has a limited capacity of + generating the clock for the codec. If you define this only rates + that are really available will be enforced. But be careful, most + user level application just want the usual sampling frequencies (8, + 11.025, 22.050, 44.1 kHz) and anyway resampling is a costly + operation for embedded systems. So if you aren't very lucky or your + hardware engineer wasn't very forward-looking it's better to leave + this undefined. If you do so an approximate value for the requested + sampling rate in the range -/+ 5% will be chosen. If this in not + possible an error will be returned. +*/ + +static struct clk *xtal; +static struct clk *pclk; +/* this is need because we don't have a place where to keep the + * pointers to the clocks in each substream. We get the clocks only + * when we are actually using them so we don't block stuff like + * frequency change or oscillator power-off */ +static int clk_users; +static DEFINE_MUTEX(clk_lock); + +static unsigned int rates[33 * 2]; +#ifdef ENFORCE_RATES +static struct snd_pcm_hw_constraint_list hw_constraints_rates = { + .count = ARRAY_SIZE(rates), + .list = rates, + .mask = 0, +}; +#endif + +static struct platform_device *s3c24xx_uda134x_snd_device; + +int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream) +{ + int ret = 0; +#ifdef ENFORCE_RATES + struct snd_pcm_runtime *runtime = substream->runtime;; +#endif + + mutex_lock(&clk_lock); + pr_debug("%s %d\n", __func__, clk_users); + if (clk_users == 0) { + xtal = clk_get(&s3c24xx_uda134x_snd_device->dev, "xtal"); + if (!xtal) { + printk(KERN_ERR "%s cannot get xtal\n", __func__); + ret = -EBUSY; + } else { + pclk = clk_get(&s3c24xx_uda134x_snd_device->dev, + "pclk"); + if (!pclk) { + printk(KERN_ERR "%s cannot get pclk\n", + __func__); + clk_put(xtal); + ret = -EBUSY; + } + } + if (!ret) { + int i, j; + + for (i = 0; i < 2; i++) { + int fs = i ? 256 : 384; + + rates[i*33] = clk_get_rate(xtal) / fs; + for (j = 1; j < 33; j++) + rates[i*33 + j] = clk_get_rate(pclk) / + (j * fs); + } + } + } + clk_users += 1; + mutex_unlock(&clk_lock); + if (!ret) { +#ifdef ENFORCE_RATES + ret = snd_pcm_hw_constraint_list(runtime, 0, + SNDRV_PCM_HW_PARAM_RATE, + &hw_constraints_rates); + if (ret < 0) + printk(KERN_ERR "%s cannot set constraints\n", + __func__); +#endif + } + return ret; +} + +void s3c24xx_uda134x_shutdown(struct snd_pcm_substream *substream) +{ + mutex_lock(&clk_lock); + pr_debug("%s %d\n", __func__, clk_users); + clk_users -= 1; + if (clk_users == 0) { + clk_put(xtal); + xtal = NULL; + clk_put(pclk); + pclk = NULL; + } + mutex_unlock(&clk_lock); +} + +static int s3c24xx_uda134x_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *codec_dai = rtd->dai->codec_dai; + struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; + unsigned int clk = 0; + int ret = 0; + int clk_source, fs_mode; + unsigned long rate = params_rate(params); + long err, cerr; + unsigned int div; + int i, bi; + + err = 999999; + bi = 0; + for (i = 0; i < 2*33; i++) { + cerr = rates[i] - rate; + if (cerr < 0) + cerr = -cerr; + if (cerr < err) { + err = cerr; + bi = i; + } + } + if (bi / 33 == 1) + fs_mode = S3C2410_IISMOD_256FS; + else + fs_mode = S3C2410_IISMOD_384FS; + if (bi % 33 == 0) { + clk_source = S3C24XX_CLKSRC_MPLL; + div = 1; + } else { + clk_source = S3C24XX_CLKSRC_PCLK; + div = bi % 33; + } + pr_debug("%s desired rate %lu, %d\n", __func__, rate, bi); + + clk = (fs_mode == S3C2410_IISMOD_384FS ? 384 : 256) * rate; + pr_debug("%s will use: %s %s %d sysclk %d err %ld\n", __func__, + fs_mode == S3C2410_IISMOD_384FS ? "384FS" : "256FS", + clk_source == S3C24XX_CLKSRC_MPLL ? "MPLLin" : "PCLK", + div, clk, err); + + if ((err * 100 / rate) > 5) { + printk(KERN_ERR "S3C24XX_UDA134X: effective frequency " + "too different from desired (%ld%%)\n", + err * 100 / rate); + return -EINVAL; + } + + ret = codec_dai->dai_ops.set_fmt(codec_dai, SND_SOC_DAIFMT_I2S | + SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS); + if (ret < 0) + return ret; + + ret = cpu_dai->dai_ops.set_fmt(cpu_dai, SND_SOC_DAIFMT_I2S | + SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS); + if (ret < 0) + return ret; + + ret = cpu_dai->dai_ops.set_sysclk(cpu_dai, clk_source , clk, + SND_SOC_CLOCK_IN); + if (ret < 0) + return ret; + + ret = cpu_dai->dai_ops.set_clkdiv(cpu_dai, S3C24XX_DIV_MCLK, + fs_mode); + if (ret < 0) + return ret; + + ret = cpu_dai->dai_ops.set_clkdiv(cpu_dai, S3C24XX_DIV_BCLK, + S3C2410_IISMOD_32FS); + if (ret < 0) + return ret; + + ret = cpu_dai->dai_ops.set_clkdiv(cpu_dai, S3C24XX_DIV_PRESCALER, + S3C24XX_PRESCALE(div, div)); + if (ret < 0) + return ret; + + /* set the codec system clock for DAC and ADC */ + ret = codec_dai->dai_ops.set_sysclk(codec_dai, 0, clk, + SND_SOC_CLOCK_OUT); + if (ret < 0) + return ret; + + return 0; +} + +static struct snd_soc_ops s3c24xx_uda134x_ops = { + .startup = s3c24xx_uda134x_startup, + .shutdown = s3c24xx_uda134x_shutdown, + .hw_params = s3c24xx_uda134x_hw_params, +}; + +static struct snd_soc_dai_link s3c24xx_uda134x_dai_link = { + .name = "UDA134X", + .stream_name = "UDA134X", + .codec_dai = &uda134x_dai, + .cpu_dai = &s3c24xx_i2s_dai, + .ops = &s3c24xx_uda134x_ops, +}; + +static struct snd_soc_machine snd_soc_machine_s3c24xx_uda134x = { + .name = "S3C24XX_UDA134X", + .dai_link = &s3c24xx_uda134x_dai_link, + .num_links = 1, +}; + +static struct s3c24xx_uda134x_platform_data *s3c24xx_uda134x_l3_pins; + +static void setdat(int v) +{ + gpio_set_value(s3c24xx_uda134x_l3_pins->l3_data, v > 0); +} + +static void setclk(int v) +{ + gpio_set_value(s3c24xx_uda134x_l3_pins->l3_clk, v > 0); +} + +static void setmode(int v) +{ + gpio_set_value(s3c24xx_uda134x_l3_pins->l3_mode, v > 0); +} + +static struct uda134x_platform_data s3c24xx_uda134x = { + .l3 = { + .setdat = setdat, + .setclk = setclk, + .setmode = setmode, + .data_hold = 1, + .data_setup = 1, + .clock_high = 1, + .mode_hold = 1, + .mode = 1, + .mode_setup = 1, + }, +}; + +static struct snd_soc_device s3c24xx_uda134x_snd_devdata = { + .machine = &snd_soc_machine_s3c24xx_uda134x, + .platform = &s3c24xx_soc_platform, + .codec_dev = &soc_codec_dev_uda134x, + .codec_data = &s3c24xx_uda134x, +}; + +static int s3c24xx_uda134x_setup_pin(int pin, char *fun) +{ + if (gpio_request(pin, "s3c24xx_uda134x") < 0) { + printk(KERN_ERR "S3C24XX_UDA134X SoC Audio: " + "l3 %s pin already in use", fun); + return -EBUSY; + } + gpio_direction_output(pin, 0); + return 0; +} + +static int s3c24xx_uda134x_probe(struct platform_device *pdev) +{ + int ret; + + printk(KERN_INFO "S3C24XX_UDA134X SoC Audio driver\n"); + + s3c24xx_uda134x_l3_pins = pdev->dev.platform_data; + if (s3c24xx_uda134x_l3_pins == NULL) { + printk(KERN_ERR "S3C24XX_UDA134X SoC Audio: " + "unable to find platform data\n"); + return -ENODEV; + } + s3c24xx_uda134x.power = s3c24xx_uda134x_l3_pins->power; + s3c24xx_uda134x.model = s3c24xx_uda134x_l3_pins->model; + + if (s3c24xx_uda134x_setup_pin(s3c24xx_uda134x_l3_pins->l3_data, + "data") < 0) + return -EBUSY; + if (s3c24xx_uda134x_setup_pin(s3c24xx_uda134x_l3_pins->l3_clk, + "clk") < 0) { + gpio_free(s3c24xx_uda134x_l3_pins->l3_data); + return -EBUSY; + } + if (s3c24xx_uda134x_setup_pin(s3c24xx_uda134x_l3_pins->l3_mode, + "mode") < 0) { + gpio_free(s3c24xx_uda134x_l3_pins->l3_data); + gpio_free(s3c24xx_uda134x_l3_pins->l3_clk); + return -EBUSY; + } + + s3c24xx_uda134x_snd_device = platform_device_alloc("soc-audio", -1); + if (!s3c24xx_uda134x_snd_device) { + printk(KERN_ERR "S3C24XX_UDA134X SoC Audio: " + "Unable to register\n"); + return -ENOMEM; + } + + platform_set_drvdata(s3c24xx_uda134x_snd_device, + &s3c24xx_uda134x_snd_devdata); + s3c24xx_uda134x_snd_devdata.dev = &s3c24xx_uda134x_snd_device->dev; + ret = platform_device_add(s3c24xx_uda134x_snd_device); + if (ret) { + printk(KERN_ERR "S3C24XX_UDA134X SoC Audio: Unable to add\n"); + platform_device_put(s3c24xx_uda134x_snd_device); + } + + return ret; +} + +static int s3c24xx_uda134x_remove(struct platform_device *pdev) +{ + platform_device_unregister(s3c24xx_uda134x_snd_device); + gpio_free(s3c24xx_uda134x_l3_pins->l3_data); + gpio_free(s3c24xx_uda134x_l3_pins->l3_clk); + gpio_free(s3c24xx_uda134x_l3_pins->l3_mode); + return 0; +} + +static struct platform_driver s3c24xx_uda134x_driver = { + .probe = s3c24xx_uda134x_probe, + .remove = s3c24xx_uda134x_remove, + .driver = { + .name = "s3c24xx_uda134x", + .owner = THIS_MODULE, + }, +}; + +static int __init s3c24xx_uda134x_init(void) +{ + return platform_driver_register(&s3c24xx_uda134x_driver); +} + +static void __exit s3c24xx_uda134x_exit(void) +{ + platform_driver_unregister(&s3c24xx_uda134x_driver); +} + + +module_init(s3c24xx_uda134x_init); +module_exit(s3c24xx_uda134x_exit); + +MODULE_AUTHOR("Zoltan Devai, Christian Pellegrin "); +MODULE_DESCRIPTION("S3C24XX_UDA134X ALSA SoC audio driver"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 4dc06f9633444f426ef9960c53426f2d2ded64ac Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 17 Nov 2008 16:01:42 +0100 Subject: netfilter: nf_conntrack: connection tracking helper name persistent aliases This patch adds the macro MODULE_ALIAS_NFCT_HELPER that defines a way to provide generic and persistent aliases for the connection tracking helpers. This next patch requires this patch. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 3 +++ net/netfilter/nf_conntrack_amanda.c | 1 + net/netfilter/nf_conntrack_ftp.c | 1 + net/netfilter/nf_conntrack_h323_main.c | 1 + net/netfilter/nf_conntrack_irc.c | 1 + net/netfilter/nf_conntrack_netbios_ns.c | 1 + net/netfilter/nf_conntrack_pptp.c | 1 + net/netfilter/nf_conntrack_sane.c | 1 + net/netfilter/nf_conntrack_sip.c | 1 + net/netfilter/nf_conntrack_tftp.c | 1 + 10 files changed, 12 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index b76a8685b5b5..f11255e1ea35 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -298,5 +298,8 @@ do { \ local_bh_enable(); \ } while (0) +#define MODULE_ALIAS_NFCT_HELPER(helper) \ + MODULE_ALIAS("nfct-helper-" helper) + #endif /* __KERNEL__ */ #endif /* _NF_CONNTRACK_H */ diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 38aedeeaf4e1..4f8fcf498545 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -30,6 +30,7 @@ MODULE_AUTHOR("Brian J. Murrell "); MODULE_DESCRIPTION("Amanda connection tracking module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_amanda"); +MODULE_ALIAS_NFCT_HELPER("amanda"); module_param(master_timeout, uint, 0600); MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 629500901bd4..703a4378074a 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -29,6 +29,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Rusty Russell "); MODULE_DESCRIPTION("ftp connection tracking helper"); MODULE_ALIAS("ip_conntrack_ftp"); +MODULE_ALIAS_NFCT_HELPER("ftp"); /* This is slow, but it's simple. --RR */ static char *ftp_buffer; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 99bc803d1dd1..687bd633c3d7 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1827,3 +1827,4 @@ MODULE_AUTHOR("Jing Min Zhao "); MODULE_DESCRIPTION("H.323 connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_h323"); +MODULE_ALIAS_NFCT_HELPER("h323"); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 4d681a04447e..409c8be58e7c 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -41,6 +41,7 @@ MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("IRC (DCC) connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_irc"); +MODULE_ALIAS_NFCT_HELPER("irc"); module_param_array(ports, ushort, &ports_c, 0400); MODULE_PARM_DESC(ports, "port numbers of IRC servers"); diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 08404e6755fb..5af4273b4668 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -37,6 +37,7 @@ MODULE_AUTHOR("Patrick McHardy "); MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_netbios_ns"); +MODULE_ALIAS_NFCT_HELPER("netbios_ns"); static unsigned int timeout __read_mostly = 3; module_param(timeout, uint, 0400); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 1bc3001d1827..9e169ef2e854 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -37,6 +37,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP"); MODULE_ALIAS("ip_conntrack_pptp"); +MODULE_ALIAS_NFCT_HELPER("pptp"); static DEFINE_SPINLOCK(nf_pptp_lock); diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index a94294b2b23c..dcfecbb81c46 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -30,6 +30,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Schmidt "); MODULE_DESCRIPTION("SANE connection tracking helper"); +MODULE_ALIAS_NFCT_HELPER("sane"); static char *sane_buffer; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 6813f1c8863f..4b572163784b 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -28,6 +28,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Christian Hentschel "); MODULE_DESCRIPTION("SIP connection tracking helper"); MODULE_ALIAS("ip_conntrack_sip"); +MODULE_ALIAS_NFCT_HELPER("sip"); #define MAX_PORTS 8 static unsigned short ports[MAX_PORTS]; diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index f57f6e7a71ee..46e646b2e9b9 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -22,6 +22,7 @@ MODULE_AUTHOR("Magnus Boden "); MODULE_DESCRIPTION("TFTP connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_tftp"); +MODULE_ALIAS_NFCT_HELPER("tftp"); #define MAX_PORTS 8 static unsigned short ports[MAX_PORTS]; -- cgit v1.2.3 From 0231022cc32d5f2e7f3c06b75691dda0ad6aec33 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 17 Nov 2008 03:22:41 +0100 Subject: tracing/function-return-tracer: add the overrun field Impact: help to find the better depth of trace We decided to arbitrary define the depth of function return trace as "20". Perhaps this is not enough. To help finding an optimal depth, we measure now the overrun: the number of functions that have been missed for the current thread. By default this is not displayed, we have to do set a particular flag on the return tracer: echo overrun > /debug/tracing/trace_options And the overrun will be printed on the right. As the trace shows below, the current 20 depth is not enough. update_wall_time+0x37f/0x8c0 -> update_xtime_cache (345 ns) (Overruns: 2838) update_wall_time+0x384/0x8c0 -> clocksource_get_next (1141 ns) (Overruns: 2838) do_timer+0x23/0x100 -> update_wall_time (3882 ns) (Overruns: 2838) tick_do_update_jiffies64+0xbf/0x160 -> do_timer (5339 ns) (Overruns: 2838) tick_sched_timer+0x6a/0xf0 -> tick_do_update_jiffies64 (7209 ns) (Overruns: 2838) vgacon_set_cursor_size+0x98/0x120 -> native_io_delay (2613 ns) (Overruns: 274) vgacon_cursor+0x16e/0x1d0 -> vgacon_set_cursor_size (33151 ns) (Overruns: 274) set_cursor+0x5f/0x80 -> vgacon_cursor (36432 ns) (Overruns: 274) con_flush_chars+0x34/0x40 -> set_cursor (38790 ns) (Overruns: 274) release_console_sem+0x1ec/0x230 -> up (721 ns) (Overruns: 274) release_console_sem+0x225/0x230 -> wake_up_klogd (316 ns) (Overruns: 274) con_flush_chars+0x39/0x40 -> release_console_sem (2996 ns) (Overruns: 274) con_write+0x22/0x30 -> con_flush_chars (46067 ns) (Overruns: 274) n_tty_write+0x1cc/0x360 -> con_write (292670 ns) (Overruns: 274) smp_apic_timer_interrupt+0x2a/0x90 -> native_apic_mem_write (330 ns) (Overruns: 274) irq_enter+0x17/0x70 -> idle_cpu (413 ns) (Overruns: 274) smp_apic_timer_interrupt+0x2f/0x90 -> irq_enter (1525 ns) (Overruns: 274) ktime_get_ts+0x40/0x70 -> getnstimeofday (465 ns) (Overruns: 274) ktime_get_ts+0x60/0x70 -> set_normalized_timespec (436 ns) (Overruns: 274) ktime_get+0x16/0x30 -> ktime_get_ts (2501 ns) (Overruns: 274) hrtimer_interrupt+0x77/0x1a0 -> ktime_get (3439 ns) (Overruns: 274) Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/include/asm/thread_info.h | 7 +++++++ arch/x86/kernel/ftrace.c | 10 ++++++--- include/linux/ftrace.h | 2 ++ include/linux/sched.h | 1 + kernel/trace/trace.c | 1 + kernel/trace/trace.h | 1 + kernel/trace/trace_functions_return.c | 38 +++++++++++++++++++++++++++++------ 7 files changed, 51 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a71158369fd4..e90e81ef6ab9 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -21,6 +21,7 @@ struct task_struct; struct exec_domain; #include #include +#include struct thread_info { struct task_struct *task; /* main task structure */ @@ -45,6 +46,11 @@ struct thread_info { int curr_ret_stack; /* Stack of return addresses for return function tracing */ struct ftrace_ret_stack ret_stack[FTRACE_RET_STACK_SIZE]; + /* + * Number of functions that haven't been traced + * because of depth overrun. + */ + atomic_t trace_overrun; #endif }; @@ -61,6 +67,7 @@ struct thread_info { .fn = do_no_restart_syscall, \ }, \ .curr_ret_stack = -1,\ + .trace_overrun = ATOMIC_INIT(0) \ } #else #define INIT_THREAD_INFO(tsk) \ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 924153edd973..356bb1eb6e9a 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -353,8 +353,10 @@ static int push_return_trace(unsigned long ret, unsigned long long time, struct thread_info *ti = current_thread_info(); /* The return trace stack is full */ - if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) + if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) { + atomic_inc(&ti->trace_overrun); return -EBUSY; + } index = ++ti->curr_ret_stack; barrier(); @@ -367,7 +369,7 @@ static int push_return_trace(unsigned long ret, unsigned long long time, /* Retrieve a function return address to the trace stack on thread info.*/ static void pop_return_trace(unsigned long *ret, unsigned long long *time, - unsigned long *func) + unsigned long *func, unsigned long *overrun) { int index; @@ -376,6 +378,7 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time, *ret = ti->ret_stack[index].ret; *func = ti->ret_stack[index].func; *time = ti->ret_stack[index].calltime; + *overrun = atomic_read(&ti->trace_overrun); ti->curr_ret_stack--; } @@ -386,7 +389,8 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time, unsigned long ftrace_return_to_handler(void) { struct ftrace_retfunc trace; - pop_return_trace(&trace.ret, &trace.calltime, &trace.func); + pop_return_trace(&trace.ret, &trace.calltime, &trace.func, + &trace.overrun); trace.rettime = cpu_clock(raw_smp_processor_id()); ftrace_function_return(&trace); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f1af1aab00e6..f7ba4ea5e128 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -318,6 +318,8 @@ struct ftrace_retfunc { unsigned long func; /* Current function */ unsigned long long calltime; unsigned long long rettime; + /* Number of functions that overran the depth limit for current task */ + unsigned long overrun; }; #ifdef CONFIG_FUNCTION_RET_TRACER diff --git a/include/linux/sched.h b/include/linux/sched.h index 61c8cc36028a..c8e0db464206 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2016,6 +2016,7 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct * used. */ task_thread_info(p)->curr_ret_stack = -1; + atomic_set(&task_thread_info(p)->trace_overrun, 0); #endif } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9531fddcfb8d..e97c29a6e7b0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -853,6 +853,7 @@ static void __trace_function_return(struct trace_array *tr, entry->parent_ip = trace->ret; entry->rettime = trace->rettime; entry->calltime = trace->calltime; + entry->overrun = trace->overrun; ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); } #endif diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9d22618bf99f..2cb12fd98f6b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -60,6 +60,7 @@ struct ftrace_ret_entry { unsigned long parent_ip; unsigned long long calltime; unsigned long long rettime; + unsigned long overrun; }; extern struct tracer boot_tracer; diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c index a68564af022b..e00d64509c9c 100644 --- a/kernel/trace/trace_functions_return.c +++ b/kernel/trace/trace_functions_return.c @@ -14,6 +14,19 @@ #include "trace.h" +#define TRACE_RETURN_PRINT_OVERRUN 0x1 +static struct tracer_opt trace_opts[] = { + /* Display overruns or not */ + { TRACER_OPT(overrun, TRACE_RETURN_PRINT_OVERRUN) }, + { } /* Empty entry */ +}; + +static struct tracer_flags tracer_flags = { + .val = 0, /* Don't display overruns by default */ + .opts = trace_opts +}; + + static int return_trace_init(struct trace_array *tr) { int cpu; @@ -42,26 +55,39 @@ print_return_function(struct trace_iterator *iter) ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); if (!ret) return TRACE_TYPE_PARTIAL_LINE; + ret = seq_print_ip_sym(s, field->ip, trace_flags & TRACE_ITER_SYM_MASK); if (!ret) return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, " (%llu ns)\n", + + ret = trace_seq_printf(s, " (%llu ns)", field->rettime - field->calltime); if (!ret) return TRACE_TYPE_PARTIAL_LINE; - else - return TRACE_TYPE_HANDLED; + + if (tracer_flags.val & TRACE_RETURN_PRINT_OVERRUN) { + ret = trace_seq_printf(s, " (Overruns: %lu)", + field->overrun); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + ret = trace_seq_printf(s, "\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; } return TRACE_TYPE_UNHANDLED; } -static struct tracer return_trace __read_mostly = -{ +static struct tracer return_trace __read_mostly = { .name = "return", .init = return_trace_init, .reset = return_trace_reset, - .print_line = print_return_function + .print_line = print_return_function, + .flags = &tracer_flags, }; static __init int init_return_trace(void) -- cgit v1.2.3 From 226c0c0ef2abdf91b8d9cce1aaf7d4635a5e5926 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 18 Nov 2008 11:54:05 +0100 Subject: netfilter: ctnetlink: helper modules load-on-demand support This patch adds module loading for helpers via ctnetlink. * Creation path: We support explicit and implicit helper assignation. For the explicit case, we try to load the module. If the module is correctly loaded and the helper is present, we return EAGAIN to re-start the creation. Otherwise, we return EOPNOTSUPP. * Update path: release the spin lock, load the module and check. If it is present, then return EAGAIN to re-start the update. This patch provides a refactorized function to lookup-and-set the connection tracking helper. The function removes the exported symbol __nf_ct_helper_find as it has not clients anymore. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_helper.h | 5 +-- net/netfilter/nf_conntrack_core.c | 28 +----------- net/netfilter/nf_conntrack_helper.c | 32 ++++++++++++- net/netfilter/nf_conntrack_netlink.c | 70 +++++++++++++++++++++++++---- 4 files changed, 95 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index f8060ab5a083..66d65a7caa39 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -38,9 +38,6 @@ struct nf_conntrack_helper unsigned int expect_class_max; }; -extern struct nf_conntrack_helper * -__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple); - extern struct nf_conntrack_helper * __nf_conntrack_helper_find_byname(const char *name); @@ -49,6 +46,8 @@ extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); +extern int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags); + static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) { return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 622d7c671cb7..1e649fb9e0df 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -588,14 +588,7 @@ init_conntrack(struct net *net, nf_conntrack_get(&ct->master->ct_general); NF_CT_STAT_INC(net, expect_new); } else { - struct nf_conntrack_helper *helper; - - helper = __nf_ct_helper_find(&repl_tuple); - if (helper) { - help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); - if (help) - rcu_assign_pointer(help->helper, helper); - } + __nf_ct_try_assign_helper(ct, GFP_ATOMIC); NF_CT_STAT_INC(net, new); } @@ -772,7 +765,6 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, const struct nf_conntrack_tuple *newreply) { struct nf_conn_help *help = nfct_help(ct); - struct nf_conntrack_helper *helper; /* Should be unconfirmed, so not in hash table yet */ NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); @@ -785,23 +777,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, return; rcu_read_lock(); - helper = __nf_ct_helper_find(newreply); - if (helper == NULL) { - if (help) - rcu_assign_pointer(help->helper, NULL); - goto out; - } - - if (help == NULL) { - help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); - if (help == NULL) - goto out; - } else { - memset(&help->help, 0, sizeof(help->help)); - } - - rcu_assign_pointer(help->helper, helper); -out: + __nf_ct_try_assign_helper(ct, GFP_ATOMIC); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 9c06b9f86ad4..9e4b74b95ce8 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -44,7 +44,7 @@ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) (__force __u16)tuple->src.u.all) % nf_ct_helper_hsize; } -struct nf_conntrack_helper * +static struct nf_conntrack_helper * __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_helper *helper; @@ -62,7 +62,6 @@ __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) } return NULL; } -EXPORT_SYMBOL_GPL(__nf_ct_helper_find); struct nf_conntrack_helper * __nf_conntrack_helper_find_byname(const char *name) @@ -94,6 +93,35 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) } EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); +int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags) +{ + int ret = 0; + struct nf_conntrack_helper *helper; + struct nf_conn_help *help = nfct_help(ct); + + helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + if (helper == NULL) { + if (help) + rcu_assign_pointer(help->helper, NULL); + goto out; + } + + if (help == NULL) { + help = nf_ct_helper_ext_add(ct, flags); + if (help == NULL) { + ret = -ENOMEM; + goto out; + } + } else { + memset(&help->help, 0, sizeof(help->help)); + } + + rcu_assign_pointer(help->helper, helper); +out: + return ret; +} +EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); + static inline int unhelp(struct nf_conntrack_tuple_hash *i, const struct nf_conntrack_helper *me) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 49a04fa0becc..4f6486cfd337 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -917,8 +917,22 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) } helper = __nf_conntrack_helper_find_byname(helpname); - if (helper == NULL) + if (helper == NULL) { +#ifdef CONFIG_MODULES + spin_unlock_bh(&nf_conntrack_lock); + + if (request_module("nfct-helper-%s", helpname) < 0) { + spin_lock_bh(&nf_conntrack_lock); + return -EOPNOTSUPP; + } + + spin_lock_bh(&nf_conntrack_lock); + helper = __nf_conntrack_helper_find_byname(helpname); + if (helper) + return -EAGAIN; +#endif return -EOPNOTSUPP; + } if (help) { if (help->helper == helper) @@ -1082,7 +1096,6 @@ ctnetlink_create_conntrack(struct nlattr *cda[], { struct nf_conn *ct; int err = -EINVAL; - struct nf_conn_help *help; struct nf_conntrack_helper *helper; ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_KERNEL); @@ -1097,16 +1110,55 @@ ctnetlink_create_conntrack(struct nlattr *cda[], ct->status |= IPS_CONFIRMED; rcu_read_lock(); - helper = __nf_ct_helper_find(rtuple); - if (helper) { - help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); - if (help == NULL) { + if (cda[CTA_HELP]) { + char *helpname; + + err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); + if (err < 0) { + rcu_read_unlock(); + goto err; + } + + helper = __nf_conntrack_helper_find_byname(helpname); + if (helper == NULL) { + rcu_read_unlock(); +#ifdef CONFIG_MODULES + if (request_module("nfct-helper-%s", helpname) < 0) { + err = -EOPNOTSUPP; + goto err; + } + + rcu_read_lock(); + helper = __nf_conntrack_helper_find_byname(helpname); + if (helper) { + rcu_read_unlock(); + err = -EAGAIN; + goto err; + } + rcu_read_unlock(); +#endif + err = -EOPNOTSUPP; + goto err; + } else { + struct nf_conn_help *help; + + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); + if (help == NULL) { + rcu_read_unlock(); + err = -ENOMEM; + goto err; + } + + /* not in hash table yet so not strictly necessary */ + rcu_assign_pointer(help->helper, helper); + } + } else { + /* try an implicit helper assignation */ + err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC); + if (err < 0) { rcu_read_unlock(); - err = -ENOMEM; goto err; } - /* not in hash table yet so not strictly necessary */ - rcu_assign_pointer(help->helper, helper); } if (cda[CTA_STATUS]) { -- cgit v1.2.3 From 19abb7b090a6bce88d4e9b2914a0367f4f684432 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 18 Nov 2008 11:56:20 +0100 Subject: netfilter: ctnetlink: deliver events for conntracks changed from userspace As for now, the creation and update of conntracks via ctnetlink do not propagate an event to userspace. This can result in inconsistent situations if several userspace processes modify the connection tracking table by means of ctnetlink at the same time. Specifically, using the conntrack command line tool and conntrackd at the same time can trigger unconsistencies. This patch also modifies the event cache infrastructure to pass the process PID and the ECHO flag to nfnetlink_send() to report back to userspace if the process that triggered the change needs so. Based on a suggestion from Patrick McHardy. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 2 +- include/net/netfilter/nf_conntrack_ecache.h | 57 +++++++++++++++++-- include/net/netfilter/nf_conntrack_expect.h | 2 + net/netfilter/nf_conntrack_core.c | 25 ++++++-- net/netfilter/nf_conntrack_ecache.c | 14 ++++- net/netfilter/nf_conntrack_expect.c | 43 +++++++++++--- net/netfilter/nf_conntrack_netlink.c | 88 ++++++++++++++++++++++++----- 7 files changed, 197 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index f11255e1ea35..2e0c53641cbe 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -199,7 +199,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple); extern void nf_conntrack_hash_insert(struct nf_conn *ct); -extern void nf_conntrack_flush(struct net *net); +extern void nf_conntrack_flush(struct net *net, u32 pid, int report); extern bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 1285ff26a014..0ff0dc69ca4a 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -17,6 +17,13 @@ struct nf_conntrack_ecache { unsigned int events; }; +/* This structure is passed to event handler */ +struct nf_ct_event { + struct nf_conn *ct; + u32 pid; + int report; +}; + extern struct atomic_notifier_head nf_conntrack_chain; extern int nf_conntrack_register_notifier(struct notifier_block *nb); extern int nf_conntrack_unregister_notifier(struct notifier_block *nb); @@ -39,22 +46,56 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) local_bh_enable(); } -static inline void nf_conntrack_event(enum ip_conntrack_events event, - struct nf_conn *ct) +static inline void +nf_conntrack_event_report(enum ip_conntrack_events event, + struct nf_conn *ct, + u32 pid, + int report) { + struct nf_ct_event item = { + .ct = ct, + .pid = pid, + .report = report + }; if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) - atomic_notifier_call_chain(&nf_conntrack_chain, event, ct); + atomic_notifier_call_chain(&nf_conntrack_chain, event, &item); } +static inline void +nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) +{ + nf_conntrack_event_report(event, ct, 0, 0); +} + +struct nf_exp_event { + struct nf_conntrack_expect *exp; + u32 pid; + int report; +}; + extern struct atomic_notifier_head nf_ct_expect_chain; extern int nf_ct_expect_register_notifier(struct notifier_block *nb); extern int nf_ct_expect_unregister_notifier(struct notifier_block *nb); +static inline void +nf_ct_expect_event_report(enum ip_conntrack_expect_events event, + struct nf_conntrack_expect *exp, + u32 pid, + int report) +{ + struct nf_exp_event item = { + .exp = exp, + .pid = pid, + .report = report + }; + atomic_notifier_call_chain(&nf_ct_expect_chain, event, &item); +} + static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp) { - atomic_notifier_call_chain(&nf_ct_expect_chain, event, exp); + nf_ct_expect_event_report(event, exp, 0, 0); } extern int nf_conntrack_ecache_init(struct net *net); @@ -66,9 +107,17 @@ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) {} static inline void nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) {} +static inline void nf_conntrack_event_report(enum ip_conntrack_events event, + struct nf_conn *ct, + u32 pid, + int report) {} static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp) {} +static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, + struct nf_conntrack_expect *exp, + u32 pid, + int report) {} static inline void nf_ct_event_cache_flush(struct net *net) {} static inline int nf_conntrack_ecache_init(struct net *net) diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 37a7fc1164b0..ab17a159ac66 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -100,6 +100,8 @@ void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t, u_int8_t, const __be16 *, const __be16 *); void nf_ct_expect_put(struct nf_conntrack_expect *exp); int nf_ct_expect_related(struct nf_conntrack_expect *expect); +int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, + u32 pid, int report); #endif /*_NF_CONNTRACK_EXPECT_H*/ diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1e649fb9e0df..dc3fea09f3fc 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -181,7 +181,8 @@ destroy_conntrack(struct nf_conntrack *nfct) NF_CT_ASSERT(atomic_read(&nfct->use) == 0); NF_CT_ASSERT(!timer_pending(&ct->timeout)); - nf_conntrack_event(IPCT_DESTROY, ct); + if (!test_bit(IPS_DYING_BIT, &ct->status)) + nf_conntrack_event(IPCT_DESTROY, ct); set_bit(IPS_DYING_BIT, &ct->status); /* To make sure we don't get any weird locking issues here: @@ -972,8 +973,20 @@ void nf_ct_iterate_cleanup(struct net *net, } EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); +struct __nf_ct_flush_report { + u32 pid; + int report; +}; + static int kill_all(struct nf_conn *i, void *data) { + struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; + + /* get_next_corpse sets the dying bit for us */ + nf_conntrack_event_report(IPCT_DESTROY, + i, + fr->pid, + fr->report); return 1; } @@ -987,9 +1000,13 @@ void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int s } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush(struct net *net) +void nf_conntrack_flush(struct net *net, u32 pid, int report) { - nf_ct_iterate_cleanup(net, kill_all, NULL); + struct __nf_ct_flush_report fr = { + .pid = pid, + .report = report, + }; + nf_ct_iterate_cleanup(net, kill_all, &fr); } EXPORT_SYMBOL_GPL(nf_conntrack_flush); @@ -1005,7 +1022,7 @@ static void nf_conntrack_cleanup_net(struct net *net) nf_ct_event_cache_flush(net); nf_conntrack_ecache_fini(net); i_see_dead_people: - nf_conntrack_flush(net); + nf_conntrack_flush(net, 0, 0); if (atomic_read(&net->ct.count) != 0) { schedule(); goto i_see_dead_people; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index a5f5e2e65d13..dee4190209cc 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -35,9 +35,17 @@ static inline void __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) { if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) - && ecache->events) - atomic_notifier_call_chain(&nf_conntrack_chain, ecache->events, - ecache->ct); + && ecache->events) { + struct nf_ct_event item = { + .ct = ecache->ct, + .pid = 0, + .report = 0 + }; + + atomic_notifier_call_chain(&nf_conntrack_chain, + ecache->events, + &item); + } ecache->events = 0; nf_ct_put(ecache->ct); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 37a703bc3b8e..3a8a34a6d37c 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -362,7 +362,7 @@ static inline int refresh_timer(struct nf_conntrack_expect *i) return 1; } -int nf_ct_expect_related(struct nf_conntrack_expect *expect) +static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) { const struct nf_conntrack_expect_policy *p; struct nf_conntrack_expect *i; @@ -371,11 +371,8 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) struct net *net = nf_ct_exp_net(expect); struct hlist_node *n; unsigned int h; - int ret; - - NF_CT_ASSERT(master_help); + int ret = 0; - spin_lock_bh(&nf_conntrack_lock); if (!master_help->helper) { ret = -ESHUTDOWN; goto out; @@ -409,18 +406,50 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) printk(KERN_WARNING "nf_conntrack: expectation table full\n"); ret = -EMFILE; - goto out; } +out: + return ret; +} + +int nf_ct_expect_related(struct nf_conntrack_expect *expect) +{ + int ret; + + spin_lock_bh(&nf_conntrack_lock); + ret = __nf_ct_expect_check(expect); + if (ret < 0) + goto out; nf_ct_expect_insert(expect); + atomic_inc(&expect->use); + spin_unlock_bh(&nf_conntrack_lock); nf_ct_expect_event(IPEXP_NEW, expect); - ret = 0; + nf_ct_expect_put(expect); + return ret; out: spin_unlock_bh(&nf_conntrack_lock); return ret; } EXPORT_SYMBOL_GPL(nf_ct_expect_related); +int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, + u32 pid, int report) +{ + int ret; + + spin_lock_bh(&nf_conntrack_lock); + ret = __nf_ct_expect_check(expect); + if (ret < 0) + goto out; + nf_ct_expect_insert(expect); +out: + spin_unlock_bh(&nf_conntrack_lock); + if (ret == 0) + nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); + return ret; +} +EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); + #ifdef CONFIG_PROC_FS struct ct_expect_iter_state { struct seq_net_private p; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 4f6486cfd337..ccc5ef1d7573 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -410,7 +410,8 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - struct nf_conn *ct = (struct nf_conn *)ptr; + struct nf_ct_event *item = (struct nf_ct_event *)ptr; + struct nf_conn *ct = item->ct; struct sk_buff *skb; unsigned int type; sk_buff_data_t b; @@ -443,7 +444,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, b = skb->tail; type |= NFNL_SUBSYS_CTNETLINK << 8; - nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); + nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg)); nfmsg = NLMSG_DATA(nlh); nlh->nlmsg_flags = flags; @@ -511,7 +512,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, rcu_read_unlock(); nlh->nlmsg_len = skb->tail - b; - nfnetlink_send(skb, 0, group, 0); + nfnetlink_send(skb, item->pid, group, item->report); return NOTIFY_DONE; nla_put_failure: @@ -722,7 +723,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { /* Flush the whole table */ - nf_conntrack_flush(&init_net); + nf_conntrack_flush(&init_net, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); return 0; } @@ -743,6 +746,14 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, } } + nf_conntrack_event_report(IPCT_DESTROY, + ct, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); + + /* death_by_timeout would report the event again */ + set_bit(IPS_DYING_BIT, &ct->status); + nf_ct_kill(ct); nf_ct_put(ct); @@ -1088,11 +1099,35 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[]) return 0; } +static inline void +ctnetlink_event_report(struct nf_conn *ct, u32 pid, int report) +{ + unsigned int events = 0; + + if (test_bit(IPS_EXPECTED_BIT, &ct->status)) + events |= IPCT_RELATED; + else + events |= IPCT_NEW; + + nf_conntrack_event_report(IPCT_STATUS | + IPCT_HELPER | + IPCT_REFRESH | + IPCT_PROTOINFO | + IPCT_NATSEQADJ | + IPCT_MARK | + events, + ct, + pid, + report); +} + static int ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conntrack_tuple *otuple, struct nf_conntrack_tuple *rtuple, - struct nf_conn *master_ct) + struct nf_conn *master_ct, + u32 pid, + int report) { struct nf_conn *ct; int err = -EINVAL; @@ -1198,9 +1233,12 @@ ctnetlink_create_conntrack(struct nlattr *cda[], ct->master = master_ct; } + nf_conntrack_get(&ct->ct_general); add_timer(&ct->timeout); nf_conntrack_hash_insert(ct); rcu_read_unlock(); + ctnetlink_event_report(ct, pid, report); + nf_ct_put(ct); return 0; @@ -1265,7 +1303,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, err = ctnetlink_create_conntrack(cda, &otuple, &rtuple, - master_ct); + master_ct, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); if (err < 0 && master_ct) nf_ct_put(master_ct); @@ -1277,6 +1317,8 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, * so there's no need to increase the refcount */ err = -EEXIST; if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + /* we only allow nat config for new conntracks */ if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { err = -EOPNOTSUPP; @@ -1287,8 +1329,19 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, err = -EOPNOTSUPP; goto out_unlock; } - err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), - cda); + + err = ctnetlink_change_conntrack(ct, cda); + if (err == 0) { + nf_conntrack_get(&ct->ct_general); + spin_unlock_bh(&nf_conntrack_lock); + ctnetlink_event_report(ct, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); + nf_ct_put(ct); + } else + spin_unlock_bh(&nf_conntrack_lock); + + return err; } out_unlock: @@ -1423,7 +1476,8 @@ static int ctnetlink_expect_event(struct notifier_block *this, { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr; + struct nf_exp_event *item = (struct nf_exp_event *)ptr; + struct nf_conntrack_expect *exp = item->exp; struct sk_buff *skb; unsigned int type; sk_buff_data_t b; @@ -1445,7 +1499,7 @@ static int ctnetlink_expect_event(struct notifier_block *this, b = skb->tail; type |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); + nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg)); nfmsg = NLMSG_DATA(nlh); nlh->nlmsg_flags = flags; @@ -1459,7 +1513,7 @@ static int ctnetlink_expect_event(struct notifier_block *this, rcu_read_unlock(); nlh->nlmsg_len = skb->tail - b; - nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0); + nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report); return NOTIFY_DONE; nla_put_failure: @@ -1673,7 +1727,7 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nlattr *cda[]) } static int -ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3) +ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3, u32 pid, int report) { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -1720,7 +1774,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3) memcpy(&exp->mask.src.u3, &mask.src.u3, sizeof(exp->mask.src.u3)); exp->mask.src.u.all = mask.src.u.all; - err = nf_ct_expect_related(exp); + err = nf_ct_expect_related_report(exp, pid, report); nf_ct_expect_put(exp); out: @@ -1753,8 +1807,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, if (!exp) { spin_unlock_bh(&nf_conntrack_lock); err = -ENOENT; - if (nlh->nlmsg_flags & NLM_F_CREATE) - err = ctnetlink_create_expect(cda, u3); + if (nlh->nlmsg_flags & NLM_F_CREATE) { + err = ctnetlink_create_expect(cda, + u3, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); + } return err; } -- cgit v1.2.3 From d9e150071d18b5c87ba7a097af4063a5ad0c6a0c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 18 Nov 2008 12:16:52 +0100 Subject: netfilter: nfnetlink_log: fix warning and prototype mismatch net/netfilter/nfnetlink_log.c:537:1: warning: symbol 'nfulnl_log_packet' was not declared. Should it be static? Including the proper header also revealed an incorrect prototype. Signed-off-by: Patrick McHardy --- include/net/netfilter/nfnetlink_log.h | 2 +- net/netfilter/nfnetlink_log.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h index 9b67f948a8d7..b0569ff0775e 100644 --- a/include/net/netfilter/nfnetlink_log.h +++ b/include/net/netfilter/nfnetlink_log.h @@ -2,7 +2,7 @@ #define _KER_NFNETLINK_LOG_H void -nfulnl_log_packet(unsigned int pf, +nfulnl_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index a51892b3f01a..2770b4e57ea0 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -30,6 +30,7 @@ #include #include #include +#include #include -- cgit v1.2.3 From ca3ea02e90d63a6a91c1c2a445d6d71f9031a44a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 18 Nov 2008 20:40:36 +0000 Subject: ASoC: Remove unused snd_soc_machine_config declaration Signed-off-by: Mark Brown --- include/sound/soc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index fa1b99b45893..077dfe4e51f0 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -228,7 +228,6 @@ struct snd_soc_dai_mode; struct snd_soc_pcm_runtime; struct snd_soc_dai; struct snd_soc_codec; -struct snd_soc_machine_config; struct soc_enum; struct snd_soc_ac97_ops; struct snd_soc_clock_info; -- cgit v1.2.3 From 1e291b14c8f1101b9093434489bd4dc0e03f3d0f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 12 Nov 2008 18:54:42 +0000 Subject: of: Add helpers for finding device nodes which have a given property This commit adds a routine for finding a device node which has a certain property. The contents of the property are not taken into account, merely the presence or absence of the property. Based on that routine, we add a for_each_ macro for iterating over all nodes that have a certain property. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- drivers/of/base.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/of.h | 6 ++++++ 2 files changed, 41 insertions(+) (limited to 'include') diff --git a/drivers/of/base.c b/drivers/of/base.c index 7c79e94a35ea..4f884a358a7b 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -328,6 +328,41 @@ struct device_node *of_find_compatible_node(struct device_node *from, } EXPORT_SYMBOL(of_find_compatible_node); +/** + * of_find_node_with_property - Find a node which has a property with + * the given name. + * @from: The node to start searching from or NULL, the node + * you pass will not be searched, only the next one + * will; typically, you pass what the previous call + * returned. of_node_put() will be called on it + * @prop_name: The name of the property to look for. + * + * Returns a node pointer with refcount incremented, use + * of_node_put() on it when done. + */ +struct device_node *of_find_node_with_property(struct device_node *from, + const char *prop_name) +{ + struct device_node *np; + struct property *pp; + + read_lock(&devtree_lock); + np = from ? from->allnext : allnodes; + for (; np; np = np->allnext) { + for (pp = np->properties; pp != 0; pp = pp->next) { + if (of_prop_cmp(pp->name, prop_name) == 0) { + of_node_get(np); + goto out; + } + } + } +out: + of_node_put(from); + read_unlock(&devtree_lock); + return np; +} +EXPORT_SYMBOL(of_find_node_with_property); + /** * of_match_node - Tell if an device_node has a matching of_match structure * @matches: array of of device match structures to search in diff --git a/include/linux/of.h b/include/linux/of.h index e2488f5e7cb2..6a7efa242f5e 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -57,6 +57,12 @@ extern struct device_node *of_get_next_child(const struct device_node *node, for (child = of_get_next_child(parent, NULL); child != NULL; \ child = of_get_next_child(parent, child)) +extern struct device_node *of_find_node_with_property( + struct device_node *from, const char *prop_name); +#define for_each_node_with_property(dn, prop_name) \ + for (dn = of_find_node_with_property(NULL, prop_name); dn; \ + dn = of_find_node_with_property(dn, prop_name)) + extern struct property *of_find_property(const struct device_node *np, const char *name, int *lenp); -- cgit v1.2.3 From 07f0757a6808f2f36a0e58c3a54867ccffdb8dc9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 19 Nov 2008 15:44:53 -0800 Subject: include/net net/ - csum_partial - remove unnecessary casts The first argument to csum_partial is const void * casts to char/u8 * are not necessary Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/checksum.h | 2 +- include/net/ip_vs.h | 6 +++--- net/core/netpoll.c | 2 +- net/ipv4/inet_lro.c | 4 ++-- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/icmp.c | 4 ++-- net/ipv6/mcast.c | 2 +- net/ipv6/ndisc.c | 4 ++-- net/ipv6/tcp_ipv6.c | 6 +++--- net/unix/af_unix.c | 4 ++-- 10 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/checksum.h b/include/net/checksum.h index 07602b7fa218..ba55d8b8c87c 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -98,7 +98,7 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { __be32 diff[] = { ~from, to }; - *sum = csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum_unfold(*sum))); + *sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum))); } static inline void csum_replace2(__sum16 *sum, __be16 from, __be16 to) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 8f6abf4883e3..ab9b003ab671 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -913,7 +913,7 @@ static inline __wsum ip_vs_check_diff4(__be32 old, __be32 new, __wsum oldsum) { __be32 diff[2] = { ~old, new }; - return csum_partial((char *) diff, sizeof(diff), oldsum); + return csum_partial(diff, sizeof(diff), oldsum); } #ifdef CONFIG_IP_VS_IPV6 @@ -923,7 +923,7 @@ static inline __wsum ip_vs_check_diff16(const __be32 *old, const __be32 *new, __be32 diff[8] = { ~old[3], ~old[2], ~old[1], ~old[0], new[3], new[2], new[1], new[0] }; - return csum_partial((char *) diff, sizeof(diff), oldsum); + return csum_partial(diff, sizeof(diff), oldsum); } #endif @@ -931,7 +931,7 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) { __be16 diff[2] = { ~old, new }; - return csum_partial((char *) diff, sizeof(diff), oldsum); + return csum_partial(diff, sizeof(diff), oldsum); } #endif /* __KERNEL__ */ diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 34f5d072f168..fc4e28e23b89 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -343,7 +343,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->check = csum_tcpudp_magic(htonl(np->local_ip), htonl(np->remote_ip), udp_len, IPPROTO_UDP, - csum_partial((unsigned char *)udph, udp_len, 0)); + csum_partial(udph, udp_len, 0)); if (udph->check == 0) udph->check = CSUM_MANGLED_0; diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index cfd034a2b96e..6a667dae315e 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -120,7 +120,7 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl); tcph->check = 0; - tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), 0); + tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0); lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, lro_desc->ip_tot_len - @@ -135,7 +135,7 @@ static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) __wsum tcp_ps_hdr_csum; tcp_csum = ~csum_unfold(tcph->check); - tcp_hdr_csum = csum_partial((u8 *)tcph, TCP_HDR_LEN(tcph), tcp_csum); + tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum); tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, len + TCP_HDR_LEN(tcph), diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b2e3ab2287ba..5559fea61e87 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -492,7 +492,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = tcp_v4_check(len, inet->saddr, inet->daddr, - csum_partial((char *)th, + csum_partial(th, th->doff << 2, skb->csum)); } @@ -726,7 +726,7 @@ static int __tcp_v4_send_synack(struct sock *sk, struct request_sock *req, th->check = tcp_v4_check(skb->len, ireq->loc_addr, ireq->rmt_addr, - csum_partial((char *)th, skb->len, + csum_partial(th, skb->len, skb->csum)); err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index be351009fd03..a77b8d103804 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -233,7 +233,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6h->icmp6_cksum = 0; if (skb_queue_len(&sk->sk_write_queue) == 1) { - skb->csum = csum_partial((char *)icmp6h, + skb->csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), skb->csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, @@ -246,7 +246,7 @@ static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct tmp_csum = csum_add(tmp_csum, skb->csum); } - tmp_csum = csum_partial((char *)icmp6h, + tmp_csum = csum_partial(icmp6h, sizeof(struct icmp6hdr), tmp_csum); icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst, diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index a76199ecad23..870a1d64605a 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1817,7 +1817,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) hdr->icmp6_cksum = csum_ipv6_magic(saddr, snd_addr, len, IPPROTO_ICMPV6, - csum_partial((__u8 *) hdr, len, 0)); + csum_partial(hdr, len, 0)); idev = in6_dev_get(skb->dev); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index fbf451c0d77a..af6705f03b5c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -491,7 +491,7 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev, hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len, IPPROTO_ICMPV6, - csum_partial((__u8 *) hdr, + csum_partial(hdr, len, 0)); return skb; @@ -1612,7 +1612,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr, len, IPPROTO_ICMPV6, - csum_partial((u8 *) icmph, len, 0)); + csum_partial(icmph, len, 0)); buff->dst = dst; idev = in6_dev_get(dst->dev); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b35787056313..a5d750acd793 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -501,7 +501,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) th->check = tcp_v6_check(th, skb->len, &treq->loc_addr, &treq->rmt_addr, - csum_partial((char *)th, skb->len, skb->csum)); + csum_partial(th, skb->len, skb->csum)); ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); err = ip6_xmit(sk, skb, &fl, opt, 0); @@ -915,7 +915,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) skb->csum_offset = offsetof(struct tcphdr, check); } else { th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, - csum_partial((char *)th, th->doff<<2, + csum_partial(th, th->doff<<2, skb->csum)); } } @@ -997,7 +997,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, } #endif - buff->csum = csum_partial((char *)t1, tot_len, 0); + buff->csum = csum_partial(t1, tot_len, 0); memset(&fl, 0, sizeof(fl)); ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f2cf3f583f62..ebc4a9a4b1f7 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -216,7 +216,7 @@ static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp) return len; } - *hashp = unix_hash_fold(csum_partial((char *)sunaddr, len, 0)); + *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0)); return len; } @@ -686,7 +686,7 @@ static int unix_autobind(struct socket *sock) retry: addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short); - addr->hash = unix_hash_fold(csum_partial((void *)addr->name, addr->len, 0)); + addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0)); spin_lock(&unix_table_lock); ordernum = (ordernum+1)&0xFFFFF; -- cgit v1.2.3 From d314774cf2cd5dfeb39a00d37deee65d4c627927 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 21:32:24 -0800 Subject: netdev: network device operations infrastructure This patch changes the network device internal API to move adminstrative operations out of the network device structure and into a separate structure. This patch involves some hackery to maintain compatablity between the new and old model, so all 300+ drivers don't have to be changed at once. For drivers that aren't converted yet, the netdevice_ops virt function list still resides in the net_device structure. For old protocols, the new net_device_ops are copied out to the old net_device pointers. After the transistion is completed the nag message can be changed to an WARN_ON, and the compatiablity code can be made configurable. Some function pointers aren't moved: * destructor can't be in net_device_ops because it may need to be referenced after the module is unloaded. * neighbor setup is manipulated in a couple of places that need special consideration * hard_start_xmit is in the fast path for transmit. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 232 +++++++++++++++++++++++++++++++++------------- net/Kconfig | 3 + net/core/dev.c | 109 +++++++++++++++------- net/core/netpoll.c | 7 +- net/core/rtnetlink.c | 9 +- net/sched/sch_generic.c | 4 +- 6 files changed, 259 insertions(+), 105 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 12d7f4469dc9..9060f5f3517a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -451,6 +451,131 @@ struct netdev_queue { struct Qdisc *qdisc_sleeping; } ____cacheline_aligned_in_smp; + +/* + * This structure defines the management hooks for network devices. + * The following hooks can bed defined and are optonal (can be null) + * unless otherwise noted. + * + * int (*ndo_init)(struct net_device *dev); + * This function is called once when network device is registered. + * The network device can use this to any late stage initializaton + * or semantic validattion. It can fail with an error code which will + * be propogated back to register_netdev + * + * void (*ndo_uninit)(struct net_device *dev); + * This function is called when device is unregistered or when registration + * fails. It is not called if init fails. + * + * int (*ndo_open)(struct net_device *dev); + * This function is called when network device transistions to the up + * state. + * + * int (*ndo_stop)(struct net_device *dev); + * This function is called when network device transistions to the down + * state. + * + * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); + * This function is called to allow device receiver to make + * changes to configuration when multicast or promiscious is enabled. + * + * void (*ndo_set_rx_mode)(struct net_device *dev); + * This function is called device changes address list filtering. + * + * void (*ndo_set_multicast_list)(struct net_device *dev); + * This function is called when the multicast address list changes. + * + * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); + * This function is called when the Media Access Control address + * needs to be changed. If not this interface is not defined, the + * mac address can not be changed. + * + * int (*ndo_validate_addr)(struct net_device *dev); + * Test if Media Access Control address is valid for the device. + * + * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); + * Called when a user request an ioctl which can't be handled by + * the generic interface code. If not defined ioctl's return + * not supported error code. + * + * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); + * Used to set network devices bus interface parameters. This interface + * is retained for legacy reason, new devices should use the bus + * interface (PCI) for low level management. + * + * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); + * Called when a user wants to change the Maximum Transfer Unit + * of a device. If not defined, any request to change MTU will + * will return an error. + * + * void (*ndo_tx_timeout) (struct net_device *dev); + * Callback uses when the transmitter has not made any progress + * for dev->watchdog ticks. + * + * struct net_device_stats* (*get_stats)(struct net_device *dev); + * Called when a user wants to get the network device usage + * statistics. If not defined, the counters in dev->stats will + * be used. + * + * void (*ndo_vlan_rx_register)(struct net_device *dev, struct vlan_group *grp); + * If device support VLAN receive accleration + * (ie. dev->features & NETIF_F_HW_VLAN_RX), then this function is called + * when vlan groups for the device changes. Note: grp is NULL + * if no vlan's groups are being used. + * + * void (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid); + * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) + * this function is called when a VLAN id is registered. + * + * void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid); + * If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER) + * this function is called when a VLAN id is unregistered. + * + * void (*ndo_poll_controller)(struct net_device *dev); + */ +struct net_device_ops { + int (*ndo_init)(struct net_device *dev); + void (*ndo_uninit)(struct net_device *dev); + int (*ndo_open)(struct net_device *dev); + int (*ndo_stop)(struct net_device *dev); +#define HAVE_CHANGE_RX_FLAGS + void (*ndo_change_rx_flags)(struct net_device *dev, + int flags); +#define HAVE_SET_RX_MODE + void (*ndo_set_rx_mode)(struct net_device *dev); +#define HAVE_MULTICAST + void (*ndo_set_multicast_list)(struct net_device *dev); +#define HAVE_SET_MAC_ADDR + int (*ndo_set_mac_address)(struct net_device *dev, + void *addr); +#define HAVE_VALIDATE_ADDR + int (*ndo_validate_addr)(struct net_device *dev); +#define HAVE_PRIVATE_IOCTL + int (*ndo_do_ioctl)(struct net_device *dev, + struct ifreq *ifr, int cmd); +#define HAVE_SET_CONFIG + int (*ndo_set_config)(struct net_device *dev, + struct ifmap *map); +#define HAVE_CHANGE_MTU + int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); + +#define HAVE_TX_TIMEOUT + void (*ndo_tx_timeout) (struct net_device *dev); + + struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); + + void (*ndo_vlan_rx_register)(struct net_device *dev, + struct vlan_group *grp); + void (*ndo_vlan_rx_add_vid)(struct net_device *dev, + unsigned short vid); + void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, + unsigned short vid); +#ifdef CONFIG_NET_POLL_CONTROLLER +#define HAVE_NETDEV_POLL + void (*ndo_poll_controller)(struct net_device *dev); +#endif +}; + /* * The DEVICE structure. * Actually, this whole structure is a big mistake. It mixes I/O @@ -498,11 +623,6 @@ struct net_device #ifdef CONFIG_NETPOLL struct list_head napi_list; #endif - - /* The device initialization function. Called only once. */ - int (*init)(struct net_device *dev); - - /* ------- Fields preinitialized in Space.c finish here ------- */ /* Net device features */ unsigned long features; @@ -546,15 +666,13 @@ struct net_device * for all in netdev_increment_features. */ #define NETIF_F_ONE_FOR_ALL (NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \ - NETIF_F_SG | NETIF_F_HIGHDMA | \ + NETIF_F_SG | NETIF_F_HIGHDMA | \ NETIF_F_FRAGLIST) /* Interface index. Unique device identifier */ int ifindex; int iflink; - - struct net_device_stats* (*get_stats)(struct net_device *dev); struct net_device_stats stats; #ifdef CONFIG_WIRELESS_EXT @@ -564,18 +682,13 @@ struct net_device /* Instance data managed by the core of Wireless Extensions. */ struct iw_public_data * wireless_data; #endif + /* Management operations */ + const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; /* Hardware header description */ const struct header_ops *header_ops; - /* - * This marks the end of the "visible" part of the structure. All - * fields hereafter are internal to the system, and may change at - * will (read: may be cleaned up at will). - */ - - unsigned int flags; /* interface flags (a la BSD) */ unsigned short gflags; unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */ @@ -634,7 +747,7 @@ struct net_device unsigned long last_rx; /* Time of last Rx */ /* Interface address info used in eth_type_trans() */ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address, (before bcast - because most packets are unicast) */ + because most packets are unicast) */ unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ @@ -648,6 +761,10 @@ struct net_device /* Number of TX queues currently active in device */ unsigned int real_num_tx_queues; + /* Map buffer to appropriate transmit queue */ + u16 (*select_queue)(struct net_device *dev, + struct sk_buff *skb); + unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; /* @@ -662,9 +779,6 @@ struct net_device int watchdog_timeo; /* used by dev_watchdog() */ struct timer_list watchdog_timer; -/* - * refcnt is a very hot point, so align it on SMP - */ /* Number of references to this device */ atomic_t refcnt ____cacheline_aligned_in_smp; @@ -683,56 +797,14 @@ struct net_device NETREG_RELEASED, /* called free_netdev */ } reg_state; - /* Called after device is detached from network. */ - void (*uninit)(struct net_device *dev); - /* Called after last user reference disappears. */ - void (*destructor)(struct net_device *dev); + /* Called from unregister, can be used to call free_netdev */ + void (*destructor)(struct net_device *dev); - /* Pointers to interface service routines. */ - int (*open)(struct net_device *dev); - int (*stop)(struct net_device *dev); -#define HAVE_NETDEV_POLL -#define HAVE_CHANGE_RX_FLAGS - void (*change_rx_flags)(struct net_device *dev, - int flags); -#define HAVE_SET_RX_MODE - void (*set_rx_mode)(struct net_device *dev); -#define HAVE_MULTICAST - void (*set_multicast_list)(struct net_device *dev); -#define HAVE_SET_MAC_ADDR - int (*set_mac_address)(struct net_device *dev, - void *addr); -#define HAVE_VALIDATE_ADDR - int (*validate_addr)(struct net_device *dev); -#define HAVE_PRIVATE_IOCTL - int (*do_ioctl)(struct net_device *dev, - struct ifreq *ifr, int cmd); -#define HAVE_SET_CONFIG - int (*set_config)(struct net_device *dev, - struct ifmap *map); -#define HAVE_CHANGE_MTU - int (*change_mtu)(struct net_device *dev, int new_mtu); + int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); -#define HAVE_TX_TIMEOUT - void (*tx_timeout) (struct net_device *dev); - - void (*vlan_rx_register)(struct net_device *dev, - struct vlan_group *grp); - void (*vlan_rx_add_vid)(struct net_device *dev, - unsigned short vid); - void (*vlan_rx_kill_vid)(struct net_device *dev, - unsigned short vid); - - int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); #ifdef CONFIG_NETPOLL struct netpoll_info *npinfo; #endif -#ifdef CONFIG_NET_POLL_CONTROLLER - void (*poll_controller)(struct net_device *dev); -#endif - - u16 (*select_queue)(struct net_device *dev, - struct sk_buff *skb); #ifdef CONFIG_NET_NS /* Network namespace this network device is inside */ @@ -763,6 +835,38 @@ struct net_device /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; + +#ifdef CONFIG_COMPAT_NET_DEV_OPS + struct { + int (*init)(struct net_device *dev); + void (*uninit)(struct net_device *dev); + int (*open)(struct net_device *dev); + int (*stop)(struct net_device *dev); + void (*change_rx_flags)(struct net_device *dev, + int flags); + void (*set_rx_mode)(struct net_device *dev); + void (*set_multicast_list)(struct net_device *dev); + int (*set_mac_address)(struct net_device *dev, + void *addr); + int (*validate_addr)(struct net_device *dev); + int (*do_ioctl)(struct net_device *dev, + struct ifreq *ifr, int cmd); + int (*set_config)(struct net_device *dev, + struct ifmap *map); + int (*change_mtu)(struct net_device *dev, int new_mtu); + void (*tx_timeout) (struct net_device *dev); + struct net_device_stats* (*get_stats)(struct net_device *dev); + void (*vlan_rx_register)(struct net_device *dev, + struct vlan_group *grp); + void (*vlan_rx_add_vid)(struct net_device *dev, + unsigned short vid); + void (*vlan_rx_kill_vid)(struct net_device *dev, + unsigned short vid); +#ifdef CONFIG_NET_POLL_CONTROLLER + void (*poll_controller)(struct net_device *dev); +#endif +#endif + }; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/net/Kconfig b/net/Kconfig index 8c3d97ca0d96..4e2e40ba8ba6 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -32,6 +32,9 @@ config NET_NS Allow user space to create what appear to be multiple instances of the network stack. +config COMPAT_NET_DEV_OPS + def_bool y + source "net/packet/Kconfig" source "net/unix/Kconfig" source "net/xfrm/Kconfig" diff --git a/net/core/dev.c b/net/core/dev.c index e08c0fcd603b..ca14ab407b33 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1059,6 +1059,7 @@ void dev_load(struct net *net, const char *name) */ int dev_open(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; int ret = 0; ASSERT_RTNL(); @@ -1081,11 +1082,11 @@ int dev_open(struct net_device *dev) */ set_bit(__LINK_STATE_START, &dev->state); - if (dev->validate_addr) - ret = dev->validate_addr(dev); + if (ops->ndo_validate_addr) + ret = ops->ndo_validate_addr(dev); - if (!ret && dev->open) - ret = dev->open(dev); + if (!ret && ops->ndo_open) + ret = ops->ndo_open(dev); /* * If it went open OK then: @@ -1129,6 +1130,7 @@ int dev_open(struct net_device *dev) */ int dev_close(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; ASSERT_RTNL(); might_sleep(); @@ -1161,8 +1163,8 @@ int dev_close(struct net_device *dev) * We allow it to be called even after a DETACH hot-plug * event. */ - if (dev->stop) - dev->stop(dev); + if (ops->ndo_stop) + ops->ndo_stop(dev); /* * Device is now down. @@ -2930,8 +2932,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) static void dev_change_rx_flags(struct net_device *dev, int flags) { - if (dev->flags & IFF_UP && dev->change_rx_flags) - dev->change_rx_flags(dev, flags); + const struct net_device_ops *ops = dev->netdev_ops; + + if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) + ops->ndo_change_rx_flags(dev, flags); } static int __dev_set_promiscuity(struct net_device *dev, int inc) @@ -3051,6 +3055,8 @@ int dev_set_allmulti(struct net_device *dev, int inc) */ void __dev_set_rx_mode(struct net_device *dev) { + const struct net_device_ops *ops = dev->netdev_ops; + /* dev_open will call this function so the list will stay sane. */ if (!(dev->flags&IFF_UP)) return; @@ -3058,8 +3064,8 @@ void __dev_set_rx_mode(struct net_device *dev) if (!netif_device_present(dev)) return; - if (dev->set_rx_mode) - dev->set_rx_mode(dev); + if (ops->ndo_set_rx_mode) + ops->ndo_set_rx_mode(dev); else { /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. @@ -3072,8 +3078,8 @@ void __dev_set_rx_mode(struct net_device *dev) dev->uc_promisc = 0; } - if (dev->set_multicast_list) - dev->set_multicast_list(dev); + if (ops->ndo_set_multicast_list) + ops->ndo_set_multicast_list(dev); } } @@ -3432,6 +3438,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) */ int dev_set_mtu(struct net_device *dev, int new_mtu) { + const struct net_device_ops *ops = dev->netdev_ops; int err; if (new_mtu == dev->mtu) @@ -3445,10 +3452,11 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) return -ENODEV; err = 0; - if (dev->change_mtu) - err = dev->change_mtu(dev, new_mtu); + if (ops->ndo_change_mtu) + err = ops->ndo_change_mtu(dev, new_mtu); else dev->mtu = new_mtu; + if (!err && dev->flags & IFF_UP) call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); return err; @@ -3463,15 +3471,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) */ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) { + const struct net_device_ops *ops = dev->netdev_ops; int err; - if (!dev->set_mac_address) + if (!ops->ndo_set_mac_address) return -EOPNOTSUPP; if (sa->sa_family != dev->type) return -EINVAL; if (!netif_device_present(dev)) return -ENODEV; - err = dev->set_mac_address(dev, sa); + err = ops->ndo_set_mac_address(dev, sa); if (!err) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); return err; @@ -3551,6 +3560,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) { int err; struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + const struct net_device_ops *ops = dev->netdev_ops; if (!dev) return -ENODEV; @@ -3578,15 +3588,15 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return 0; case SIOCSIFMAP: - if (dev->set_config) { + if (ops->ndo_set_config) { if (!netif_device_present(dev)) return -ENODEV; - return dev->set_config(dev, &ifr->ifr_map); + return ops->ndo_set_config(dev, &ifr->ifr_map); } return -EOPNOTSUPP; case SIOCADDMULTI: - if ((!dev->set_multicast_list && !dev->set_rx_mode) || + if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -3595,7 +3605,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) dev->addr_len, 1); case SIOCDELMULTI: - if ((!dev->set_multicast_list && !dev->set_rx_mode) || + if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -3633,10 +3643,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) cmd == SIOCBRDELIF || cmd == SIOCWANDEV) { err = -EOPNOTSUPP; - if (dev->do_ioctl) { + if (ops->ndo_do_ioctl) { if (netif_device_present(dev)) - err = dev->do_ioctl(dev, ifr, - cmd); + err = ops->ndo_do_ioctl(dev, ifr, cmd); else err = -ENODEV; } @@ -3897,8 +3906,8 @@ static void rollback_registered(struct net_device *dev) */ dev_addr_discard(dev); - if (dev->uninit) - dev->uninit(dev); + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); /* Notifier chain MUST detach us from master device. */ WARN_ON(dev->master); @@ -3988,7 +3997,7 @@ int register_netdevice(struct net_device *dev) struct hlist_head *head; struct hlist_node *p; int ret; - struct net *net; + struct net *net = dev_net(dev); BUG_ON(dev_boot_phase); ASSERT_RTNL(); @@ -3997,8 +4006,7 @@ int register_netdevice(struct net_device *dev) /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); - BUG_ON(!dev_net(dev)); - net = dev_net(dev); + BUG_ON(!net); spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); @@ -4006,9 +4014,46 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; +#ifdef CONFIG_COMPAT_NET_DEV_OPS + /* Netdevice_ops API compatiability support. + * This is temporary until all network devices are converted. + */ + if (dev->netdev_ops) { + const struct net_device_ops *ops = dev->netdev_ops; + + dev->init = ops->ndo_init; + dev->uninit = ops->ndo_uninit; + dev->open = ops->ndo_open; + dev->change_rx_flags = ops->ndo_change_rx_flags; + dev->set_rx_mode = ops->ndo_set_rx_mode; + dev->set_multicast_list = ops->ndo_set_multicast_list; + dev->set_mac_address = ops->ndo_set_mac_address; + dev->validate_addr = ops->ndo_validate_addr; + dev->do_ioctl = ops->ndo_do_ioctl; + dev->set_config = ops->ndo_set_config; + dev->change_mtu = ops->ndo_change_mtu; + dev->tx_timeout = ops->ndo_tx_timeout; + dev->get_stats = ops->ndo_get_stats; + dev->vlan_rx_register = ops->ndo_vlan_rx_register; + dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; + dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = ops->ndo_poll_controller; +#endif + } else { + char drivername[64]; + pr_info("%s (%s): not using net_device_ops yet\n", + dev->name, netdev_drivername(dev, drivername, 64)); + + /* This works only because net_device_ops and the + compatiablity structure are the same. */ + dev->netdev_ops = (void *) &(dev->init); + } +#endif + /* Init, if this function is available */ - if (dev->init) { - ret = dev->init(dev); + if (dev->netdev_ops->ndo_init) { + ret = dev->netdev_ops->ndo_init(dev); if (ret) { if (ret > 0) ret = -EIO; @@ -4086,8 +4131,8 @@ out: return ret; err_uninit: - if (dev->uninit) - dev->uninit(dev); + if (dev->netdev_ops->ndo_uninit) + dev->netdev_ops->ndo_uninit(dev); goto out; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index fc4e28e23b89..630df6034444 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -172,12 +172,13 @@ static void service_arp_queue(struct netpoll_info *npi) void netpoll_poll(struct netpoll *np) { struct net_device *dev = np->dev; + const struct net_device_ops *ops = dev->netdev_ops; - if (!dev || !netif_running(dev) || !dev->poll_controller) + if (!dev || !netif_running(dev) || !ops->ndo_poll_controller) return; /* Process pending work on NIC */ - dev->poll_controller(dev); + ops->ndo_poll_controller(dev); poll_napi(dev); @@ -694,7 +695,7 @@ int netpoll_setup(struct netpoll *np) atomic_inc(&npinfo->refcnt); } - if (!ndev->poll_controller) { + if (!ndev->netdev_ops->ndo_poll_controller) { printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n", np->name, np->dev_name); err = -ENOTSUPP; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4dfb6b4d4559..6f8e0778e565 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -762,6 +762,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct nlattr **tb, char *ifname, int modified) { + const struct net_device_ops *ops = dev->netdev_ops; int send_addr_notify = 0; int err; @@ -783,7 +784,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct rtnl_link_ifmap *u_map; struct ifmap k_map; - if (!dev->set_config) { + if (!ops->ndo_set_config) { err = -EOPNOTSUPP; goto errout; } @@ -801,7 +802,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, k_map.dma = (unsigned char) u_map->dma; k_map.port = (unsigned char) u_map->port; - err = dev->set_config(dev, &k_map); + err = ops->ndo_set_config(dev, &k_map); if (err < 0) goto errout; @@ -812,7 +813,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, struct sockaddr *sa; int len; - if (!dev->set_mac_address) { + if (!ops->ndo_set_mac_address) { err = -EOPNOTSUPP; goto errout; } @@ -831,7 +832,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); - err = dev->set_mac_address(dev, sa); + err = ops->ndo_set_mac_address(dev, sa); kfree(sa); if (err) goto errout; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 80c8f3dbbea1..95ab55c064f1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -224,7 +224,7 @@ static void dev_watchdog(unsigned long arg) char drivername[64]; WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", dev->name, netdev_drivername(dev, drivername, 64)); - dev->tx_timeout(dev); + dev->netdev_ops->ndo_tx_timeout(dev); } if (!mod_timer(&dev->watchdog_timer, round_jiffies(jiffies + @@ -239,7 +239,7 @@ static void dev_watchdog(unsigned long arg) void __netdev_watchdog_up(struct net_device *dev) { - if (dev->tx_timeout) { + if (dev->netdev_ops->ndo_tx_timeout) { if (dev->watchdog_timeo <= 0) dev->watchdog_timeo = 5*HZ; if (!mod_timer(&dev->watchdog_timer, -- cgit v1.2.3 From eeda3fd64f75bcbfaa70ce946513abaf3f23b8e0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 21:40:23 -0800 Subject: netdev: introduce dev_get_stats() In order for the network device ops get_stats call to be immutable, the handling of the default internal network device stats block has to be changed. Add a new helper function which replaces the old use of internal_get_stats. Note: change return code to make it clear that the caller should not go changing the returned statistics. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- arch/s390/appldata/appldata_net_sum.c | 4 ++-- drivers/net/bonding/bond_main.c | 5 +++-- drivers/net/sfc/ethtool.c | 2 +- drivers/parisc/led.c | 4 ++-- include/linux/netdevice.h | 4 +++- net/core/dev.c | 23 ++++++++++++++++++----- net/core/net-sysfs.c | 3 +-- net/core/rtnetlink.c | 6 +++--- 8 files changed, 33 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 3b746556e1a3..fa741f84c5b9 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -67,7 +67,6 @@ static void appldata_get_net_sum_data(void *data) int i; struct appldata_net_sum_data *net_data; struct net_device *dev; - struct net_device_stats *stats; unsigned long rx_packets, tx_packets, rx_bytes, tx_bytes, rx_errors, tx_errors, rx_dropped, tx_dropped, collisions; @@ -86,7 +85,8 @@ static void appldata_get_net_sum_data(void *data) collisions = 0; read_lock(&dev_base_lock); for_each_netdev(&init_net, dev) { - stats = dev->get_stats(dev); + const struct net_device_stats *stats = dev_get_stats(dev); + rx_packets += stats->rx_packets; tx_packets += stats->tx_packets; rx_bytes += stats->rx_bytes; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index a08ea4808056..db5f5c24a250 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3899,7 +3899,7 @@ static int bond_close(struct net_device *bond_dev) static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); - struct net_device_stats *stats = &(bond->stats), *sstats; + struct net_device_stats *stats = &bond->stats; struct net_device_stats local_stats; struct slave *slave; int i; @@ -3909,7 +3909,8 @@ static struct net_device_stats *bond_get_stats(struct net_device *bond_dev) read_lock_bh(&bond->lock); bond_for_each_slave(bond, slave, i) { - sstats = slave->dev->get_stats(slave->dev); + const struct net_device_stats *sstats = dev_get_stats(slave->dev); + local_stats.rx_packets += sstats->rx_packets; local_stats.rx_bytes += sstats->rx_bytes; local_stats.rx_errors += sstats->rx_errors; diff --git a/drivers/net/sfc/ethtool.c b/drivers/net/sfc/ethtool.c index abd8fcd6e62d..cd92c4d8dbc5 100644 --- a/drivers/net/sfc/ethtool.c +++ b/drivers/net/sfc/ethtool.c @@ -426,7 +426,7 @@ static void efx_ethtool_get_stats(struct net_device *net_dev, EFX_BUG_ON_PARANOID(stats->n_stats != EFX_ETHTOOL_NUM_STATS); /* Update MAC and NIC statistics */ - net_dev->get_stats(net_dev); + dev_get_stats(net_dev); /* Fill detailed statistics buffer */ for (i = 0; i < EFX_ETHTOOL_NUM_STATS; i++) { diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index f9b12664f9fb..454b6532e409 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -360,13 +360,13 @@ static __inline__ int led_get_net_activity(void) read_lock(&dev_base_lock); rcu_read_lock(); for_each_netdev(&init_net, dev) { - struct net_device_stats *stats; + const struct net_device_stats *stats; struct in_device *in_dev = __in_dev_get_rcu(dev); if (!in_dev || !in_dev->ifa_list) continue; if (ipv4_is_loopback(in_dev->ifa_list->ifa_local)) continue; - stats = dev->get_stats(dev); + stats = dev_get_stats(dev); rx_total += stats->rx_packets; tx_total += stats->tx_packets; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9060f5f3517a..981a089d5149 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -864,9 +864,9 @@ struct net_device unsigned short vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*poll_controller)(struct net_device *dev); -#endif #endif }; +#endif }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -1780,6 +1780,8 @@ extern void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ extern void dev_load(struct net *net, const char *name); extern void dev_mcast_init(void); +extern const struct net_device_stats *dev_get_stats(struct net_device *dev); + extern int netdev_max_backlog; extern int weight_p; extern int netdev_set_master(struct net_device *dev, struct net_device *master); diff --git a/net/core/dev.c b/net/core/dev.c index ca14ab407b33..8843f4e3f5e1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2620,7 +2620,7 @@ void dev_seq_stop(struct seq_file *seq, void *v) static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { - struct net_device_stats *stats = dev->get_stats(dev); + const struct net_device_stats *stats = dev_get_stats(dev); seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", @@ -4288,10 +4288,24 @@ void netdev_run_todo(void) } } -static struct net_device_stats *internal_stats(struct net_device *dev) -{ - return &dev->stats; +/** + * dev_get_stats - get network device statistics + * @dev: device to get statistics from + * + * Get network statistics from device. The device driver may provide + * its own method by setting dev->netdev_ops->get_stats; otherwise + * the internal statistics structure is used. + */ +const struct net_device_stats *dev_get_stats(struct net_device *dev) + { + const struct net_device_ops *ops = dev->netdev_ops; + + if (ops->ndo_get_stats) + return ops->ndo_get_stats(dev); + else + return &dev->stats; } +EXPORT_SYMBOL(dev_get_stats); static void netdev_init_one_queue(struct net_device *dev, struct netdev_queue *queue, @@ -4370,7 +4384,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, netdev_init_queues(dev); - dev->get_stats = internal_stats; netpoll_netdev_init(dev); setup(dev); strcpy(dev->name, name); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 146dcfeb060e..afd42d717320 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -270,7 +270,6 @@ static ssize_t netstat_show(const struct device *d, unsigned long offset) { struct net_device *dev = to_net_dev(d); - struct net_device_stats *stats; ssize_t ret = -EINVAL; WARN_ON(offset > sizeof(struct net_device_stats) || @@ -278,7 +277,7 @@ static ssize_t netstat_show(const struct device *d, read_lock(&dev_base_lock); if (dev_isalive(dev)) { - stats = dev->get_stats(dev); + const struct net_device_stats *stats = dev_get_stats(dev); ret = sprintf(buf, fmt_ulong, *(unsigned long *)(((u8 *) stats) + offset)); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 6f8e0778e565..790dd205bb5d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -551,7 +551,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition) } static void copy_rtnl_link_stats(struct rtnl_link_stats *a, - struct net_device_stats *b) + const struct net_device_stats *b) { a->rx_packets = b->rx_packets; a->tx_packets = b->tx_packets; @@ -609,7 +609,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq; struct ifinfomsg *ifm; struct nlmsghdr *nlh; - struct net_device_stats *stats; + const struct net_device_stats *stats; struct nlattr *attr; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -666,7 +666,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (attr == NULL) goto nla_put_failure; - stats = dev->get_stats(dev); + stats = dev_get_stats(dev); copy_rtnl_link_stats(nla_data(attr), stats); if (dev->rtnl_link_ops) { -- cgit v1.2.3 From ccad637b0c57de1825ffd34c311bf71487545ac2 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 19 Nov 2008 22:42:31 -0800 Subject: netdev: expose ethernet address primitives When ethernet devices are converted, the function pointer setup by eth_setup() need to be done during intialization. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/usb/gadget/u_ether.c | 4 ++-- include/linux/etherdevice.h | 4 ++++ net/ethernet/eth.c | 13 ++++++++----- 3 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c index 00fa5239879d..d9739d52f8f5 100644 --- a/drivers/usb/gadget/u_ether.c +++ b/drivers/usb/gadget/u_ether.c @@ -146,7 +146,7 @@ static inline int qlen(struct usb_gadget *gadget) /* NETWORK DRIVER HOOKUP (to the layer above this driver) */ -static int eth_change_mtu(struct net_device *net, int new_mtu) +static int ueth_change_mtu(struct net_device *net, int new_mtu) { struct eth_dev *dev = netdev_priv(net); unsigned long flags; @@ -764,7 +764,7 @@ int __init gether_setup(struct usb_gadget *g, u8 ethaddr[ETH_ALEN]) if (ethaddr) memcpy(ethaddr, dev->host_mac, ETH_ALEN); - net->change_mtu = eth_change_mtu; + net->change_mtu = ueth_change_mtu; net->hard_start_xmit = eth_start_xmit; net->open = eth_open; net->stop = eth_stop; diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 25d62e6e3290..0e5e97060034 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -41,6 +41,10 @@ extern int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh); extern void eth_header_cache_update(struct hh_cache *hh, const struct net_device *dev, const unsigned char *haddr); +extern int eth_mac_addr(struct net_device *dev, void *p); +extern int eth_change_mtu(struct net_device *dev, int new_mtu); +extern int eth_validate_addr(struct net_device *dev); + extern struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index b9d85af2dd31..a87a171d9914 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -282,7 +282,7 @@ EXPORT_SYMBOL(eth_header_cache_update); * This doesn't change hardware matching, so needs to be overridden * for most real devices. */ -static int eth_mac_addr(struct net_device *dev, void *p) +int eth_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; @@ -293,6 +293,7 @@ static int eth_mac_addr(struct net_device *dev, void *p) memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); return 0; } +EXPORT_SYMBOL(eth_mac_addr); /** * eth_change_mtu - set new MTU size @@ -302,21 +303,23 @@ static int eth_mac_addr(struct net_device *dev, void *p) * Allow changing MTU size. Needs to be overridden for devices * supporting jumbo frames. */ -static int eth_change_mtu(struct net_device *dev, int new_mtu) +int eth_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < 68 || new_mtu > ETH_DATA_LEN) return -EINVAL; dev->mtu = new_mtu; return 0; } +EXPORT_SYMBOL(eth_change_mtu); -static int eth_validate_addr(struct net_device *dev) +int eth_validate_addr(struct net_device *dev) { if (!is_valid_ether_addr(dev->dev_addr)) return -EADDRNOTAVAIL; return 0; } +EXPORT_SYMBOL(eth_validate_addr); const struct header_ops eth_header_ops ____cacheline_aligned = { .create = eth_header, @@ -334,11 +337,11 @@ const struct header_ops eth_header_ops ____cacheline_aligned = { void ether_setup(struct net_device *dev) { dev->header_ops = ð_header_ops; - +#ifdef CONFIG_COMPAT_NET_DEV_OPS dev->change_mtu = eth_change_mtu; dev->set_mac_address = eth_mac_addr; dev->validate_addr = eth_validate_addr; - +#endif dev->type = ARPHRD_ETHER; dev->hard_header_len = ETH_HLEN; dev->mtu = ETH_DATA_LEN; -- cgit v1.2.3 From 5caea4ea7088e80ac5410d04660346094608b909 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Nov 2008 00:40:07 -0800 Subject: net: listening_hash get a spinlock per bucket This patch prepares RCU migration of listening_hash table for TCP/DCCP protocols. listening_hash table being small (32 slots per protocol), we add a spinlock for each slot, instead of a single rwlock for whole table. This should reduce hold time of readers, and writers concurrency. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 45 ++++++++-------------- net/dccp/proto.c | 8 +--- net/ipv4/inet_diag.c | 12 +++--- net/ipv4/inet_hashtables.c | 86 ++++++++++++++++--------------------------- net/ipv4/tcp_ipv4.c | 24 ++++++------ net/ipv6/inet6_hashtables.c | 23 ++++++------ 6 files changed, 79 insertions(+), 119 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 481896045111..62d2dd0d7860 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -99,6 +99,11 @@ struct inet_bind_hashbucket { struct hlist_head chain; }; +struct inet_listen_hashbucket { + spinlock_t lock; + struct hlist_head head; +}; + /* This is for listening sockets, thus all sockets which possess wildcards. */ #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ @@ -123,22 +128,21 @@ struct inet_hashinfo { unsigned int bhash_size; /* Note : 4 bytes padding on 64 bit arches */ - /* All sockets in TCP_LISTEN state will be in here. This is the only - * table where wildcard'd TCP sockets can exist. Hash function here - * is just local port number. - */ - struct hlist_head listening_hash[INET_LHTABLE_SIZE]; + struct kmem_cache *bind_bucket_cachep; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. * * Now align to a new cache line as all the following members - * are often dirty. + * might be often dirty. + */ + /* All sockets in TCP_LISTEN state will be in here. This is the only + * table where wildcard'd TCP sockets can exist. Hash function here + * is just local port number. */ - rwlock_t lhash_lock ____cacheline_aligned; - atomic_t lhash_users; - wait_queue_head_t lhash_wait; - struct kmem_cache *bind_bucket_cachep; + struct inet_listen_hashbucket listening_hash[INET_LHTABLE_SIZE] + ____cacheline_aligned_in_smp; + }; static inline struct inet_ehash_bucket *inet_ehash_bucket( @@ -236,26 +240,7 @@ extern void __inet_inherit_port(struct sock *sk, struct sock *child); extern void inet_put_port(struct sock *sk); -extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); - -/* - * - We may sleep inside this lock. - * - If sleeping is not required (or called from BH), - * use plain read_(un)lock(&inet_hashinfo.lhash_lock). - */ -static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) -{ - /* read_lock synchronizes to candidates to writers */ - read_lock(&hashinfo->lhash_lock); - atomic_inc(&hashinfo->lhash_users); - read_unlock(&hashinfo->lhash_lock); -} - -static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) -{ - if (atomic_dec_and_test(&hashinfo->lhash_users)) - wake_up(&hashinfo->lhash_wait); -} +void inet_hashinfo_init(struct inet_hashinfo *h); extern void __inet_hash_nolisten(struct sock *sk); extern void inet_hash(struct sock *sk); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index bdf784c422b5..8b63394ec24c 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -44,12 +44,7 @@ atomic_t dccp_orphan_count = ATOMIC_INIT(0); EXPORT_SYMBOL_GPL(dccp_orphan_count); -struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { - .lhash_lock = RW_LOCK_UNLOCKED, - .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), -}; - +struct inet_hashinfo dccp_hashinfo; EXPORT_SYMBOL_GPL(dccp_hashinfo); /* the maximum queue length for tx in packets. 0 is no limit */ @@ -1030,6 +1025,7 @@ static int __init dccp_init(void) BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); + inet_hashinfo_init(&dccp_hashinfo); dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 41b36720e977..1cb154ed75ad 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -718,13 +718,15 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) goto skip_listen_ht; - inet_listen_lock(hashinfo); for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; struct hlist_node *node; + struct inet_listen_hashbucket *ilb; num = 0; - sk_for_each(sk, node, &hashinfo->listening_hash[i]) { + ilb = &hashinfo->listening_hash[i]; + spin_lock_bh(&ilb->lock); + sk_for_each(sk, node, &ilb->head) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) { @@ -742,7 +744,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto syn_recv; if (inet_csk_diag_dump(sk, skb, cb) < 0) { - inet_listen_unlock(hashinfo); + spin_unlock_bh(&ilb->lock); goto done; } @@ -751,7 +753,7 @@ syn_recv: goto next_listen; if (inet_diag_dump_reqs(skb, sk, cb) < 0) { - inet_listen_unlock(hashinfo); + spin_unlock_bh(&ilb->lock); goto done; } @@ -760,12 +762,12 @@ next_listen: cb->args[4] = 0; ++num; } + spin_unlock_bh(&ilb->lock); s_num = 0; cb->args[3] = 0; cb->args[4] = 0; } - inet_listen_unlock(hashinfo); skip_listen_ht: cb->args[0] = 1; s_i = num = s_num = 0; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fd269cfef0ec..377d004e5723 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -110,35 +110,6 @@ void __inet_inherit_port(struct sock *sk, struct sock *child) EXPORT_SYMBOL_GPL(__inet_inherit_port); -/* - * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. - * Look, when several writers sleep and reader wakes them up, all but one - * immediately hit write lock and grab all the cpus. Exclusive sleep solves - * this, _but_ remember, it adds useless work on UP machines (wake up each - * exclusive lock release). It should be ifdefed really. - */ -void inet_listen_wlock(struct inet_hashinfo *hashinfo) - __acquires(hashinfo->lhash_lock) -{ - write_lock(&hashinfo->lhash_lock); - - if (atomic_read(&hashinfo->lhash_users)) { - DEFINE_WAIT(wait); - - for (;;) { - prepare_to_wait_exclusive(&hashinfo->lhash_wait, - &wait, TASK_UNINTERRUPTIBLE); - if (!atomic_read(&hashinfo->lhash_users)) - break; - write_unlock_bh(&hashinfo->lhash_lock); - schedule(); - write_lock_bh(&hashinfo->lhash_lock); - } - - finish_wait(&hashinfo->lhash_wait, &wait); - } -} - /* * Don't inline this cruft. Here are some nice properties to exploit here. The * BSD API does not allow a listening sock to specify the remote port nor the @@ -191,25 +162,25 @@ struct sock *__inet_lookup_listener(struct net *net, const int dif) { struct sock *sk = NULL; - const struct hlist_head *head; + struct inet_listen_hashbucket *ilb; - read_lock(&hashinfo->lhash_lock); - head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; - if (!hlist_empty(head)) { - const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); + ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; + spin_lock(&ilb->lock); + if (!hlist_empty(&ilb->head)) { + const struct inet_sock *inet = inet_sk((sk = __sk_head(&ilb->head))); if (inet->num == hnum && !sk->sk_node.next && (!inet->rcv_saddr || inet->rcv_saddr == daddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) goto sherry_cache; - sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); + sk = inet_lookup_listener_slow(net, &ilb->head, daddr, hnum, dif); } if (sk) { sherry_cache: sock_hold(sk); } - read_unlock(&hashinfo->lhash_lock); + spin_unlock(&ilb->lock); return sk; } EXPORT_SYMBOL_GPL(__inet_lookup_listener); @@ -389,8 +360,7 @@ EXPORT_SYMBOL_GPL(__inet_hash_nolisten); static void __inet_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; - rwlock_t *lock; + struct inet_listen_hashbucket *ilb; if (sk->sk_state != TCP_LISTEN) { __inet_hash_nolisten(sk); @@ -398,14 +368,12 @@ static void __inet_hash(struct sock *sk) } WARN_ON(!sk_unhashed(sk)); - list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &hashinfo->lhash_lock; + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - inet_listen_wlock(hashinfo); - __sk_add_node(sk, list); + spin_lock(&ilb->lock); + __sk_add_node(sk, &ilb->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); - wake_up(&hashinfo->lhash_wait); + spin_unlock(&ilb->lock); } void inet_hash(struct sock *sk) @@ -420,29 +388,27 @@ EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { - rwlock_t *lock; struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; if (sk_unhashed(sk)) - goto out; + return; if (sk->sk_state == TCP_LISTEN) { - local_bh_disable(); - inet_listen_wlock(hashinfo); - lock = &hashinfo->lhash_lock; + struct inet_listen_hashbucket *ilb; + + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + spin_lock_bh(&ilb->lock); if (__sk_del_node_init(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + spin_unlock_bh(&ilb->lock); } else { - lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + write_lock_bh(lock); if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + write_unlock_bh(lock); } - - write_unlock_bh(lock); -out: - if (sk->sk_state == TCP_LISTEN) - wake_up(&hashinfo->lhash_wait); } EXPORT_SYMBOL_GPL(inet_unhash); @@ -556,3 +522,13 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, } EXPORT_SYMBOL_GPL(inet_hash_connect); + +void inet_hashinfo_init(struct inet_hashinfo *h) +{ + int i; + + for (i = 0; i < INET_LHTABLE_SIZE; i++) + spin_lock_init(&h->listening_hash[i].lock); +} + +EXPORT_SYMBOL_GPL(inet_hashinfo_init); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5559fea61e87..330b08a12274 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -97,11 +97,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) } #endif -struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { - .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), - .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), -}; +struct inet_hashinfo tcp_hashinfo; static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) { @@ -1874,15 +1870,18 @@ static void *listening_get_next(struct seq_file *seq, void *cur) struct inet_connection_sock *icsk; struct hlist_node *node; struct sock *sk = cur; + struct inet_listen_hashbucket *ilb; struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); if (!sk) { st->bucket = 0; - sk = sk_head(&tcp_hashinfo.listening_hash[0]); + ilb = &tcp_hashinfo.listening_hash[0]; + spin_lock_bh(&ilb->lock); + sk = sk_head(&ilb->head); goto get_sk; } - + ilb = &tcp_hashinfo.listening_hash[st->bucket]; ++st->num; if (st->state == TCP_SEQ_STATE_OPENREQ) { @@ -1932,8 +1931,11 @@ start_req: } read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } + spin_unlock_bh(&ilb->lock); if (++st->bucket < INET_LHTABLE_SIZE) { - sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); + ilb = &tcp_hashinfo.listening_hash[st->bucket]; + spin_lock_bh(&ilb->lock); + sk = sk_head(&ilb->head); goto get_sk; } cur = NULL; @@ -2066,12 +2068,10 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) void *rc; struct tcp_iter_state *st = seq->private; - inet_listen_lock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_LISTENING; rc = listening_get_idx(seq, &pos); if (!rc) { - inet_listen_unlock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_idx(seq, pos); } @@ -2103,7 +2103,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) case TCP_SEQ_STATE_LISTENING: rc = listening_get_next(seq, v); if (!rc) { - inet_listen_unlock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_first(seq); } @@ -2130,7 +2129,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) } case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) - inet_listen_unlock(&tcp_hashinfo); + spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); break; case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: @@ -2405,6 +2404,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { void __init tcp_v4_init(void) { + inet_hashinfo_init(&tcp_hashinfo); if (register_pernet_device(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index c1b4d401fd95..21544b9be259 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -25,30 +25,30 @@ void __inet6_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - rwlock_t *lock; WARN_ON(!sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { - struct hlist_head *list; + struct inet_listen_hashbucket *ilb; - list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &hashinfo->lhash_lock; - inet_listen_wlock(hashinfo); - __sk_add_node(sk, list); + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + spin_lock(&ilb->lock); + __sk_add_node(sk, &ilb->head); + spin_unlock(&ilb->lock); } else { unsigned int hash; struct hlist_nulls_head *list; + rwlock_t *lock; sk->sk_hash = hash = inet6_sk_ehashfn(sk); list = &inet_ehash_bucket(hashinfo, hash)->chain; lock = inet_ehash_lockp(hashinfo, hash); write_lock(lock); __sk_nulls_add_node_rcu(sk, list); + write_unlock(lock); } sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); } EXPORT_SYMBOL(__inet6_hash); @@ -126,10 +126,11 @@ struct sock *inet6_lookup_listener(struct net *net, const struct hlist_node *node; struct sock *result = NULL; int score, hiscore = 0; + struct inet_listen_hashbucket *ilb; - read_lock(&hashinfo->lhash_lock); - sk_for_each(sk, node, - &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) { + ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; + spin_lock(&ilb->lock); + sk_for_each(sk, node, &ilb->head) { if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); @@ -157,7 +158,7 @@ struct sock *inet6_lookup_listener(struct net *net, } if (result) sock_hold(result); - read_unlock(&hashinfo->lhash_lock); + spin_unlock(&ilb->lock); return result; } -- cgit v1.2.3 From d214c7537bbf2f247991fb65b3420b0b3d712c67 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 20 Nov 2008 00:49:27 -0800 Subject: filter: add SKF_AD_NLATTR_NEST to look for nested attributes SKF_AD_NLATTR allows us to find the first matching attribute in a stream of netlink attributes from one offset to the end of the netlink message. This is not suitable to look for a specific matching inside a set of nested attributes. For example, in ctnetlink messages, if we look for the CTA_V6_SRC attribute in a message that talks about an IPv4 connection, SKF_AD_NLATTR returns the offset of CTA_STATUS which has the same value of CTA_V6_SRC but outside the nest. To differenciate CTA_STATUS and CTA_V6_SRC, we would have to make assumptions on the size of the attribute and the usual offset, resulting in horrible BSF code. This patch adds SKF_AD_NLATTR_NEST, which is a variant of SKF_AD_NLATTR, that looks for an attribute inside the limits of a nested attributes, but not further. This patch validates that we have enough room to look for the nested attributes - based on a suggestion from Patrick McHardy. Signed-off-by: Pablo Neira Ayuso Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- net/core/filter.c | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index b6ea9aa9e853..1354aaf6abbe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -122,7 +122,8 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_PKTTYPE 4 #define SKF_AD_IFINDEX 8 #define SKF_AD_NLATTR 12 -#define SKF_AD_MAX 16 +#define SKF_AD_NLATTR_NEST 16 +#define SKF_AD_MAX 20 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/net/core/filter.c b/net/core/filter.c index df3744355839..d1d779ca096d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -319,6 +319,25 @@ load_b: A = 0; continue; } + case SKF_AD_NLATTR_NEST: { + struct nlattr *nla; + + if (skb_is_nonlinear(skb)) + return 0; + if (A > skb->len - sizeof(struct nlattr)) + return 0; + + nla = (struct nlattr *)&skb->data[A]; + if (nla->nla_len > A - skb->len) + return 0; + + nla = nla_find_nested(nla, X); + if (nla) + A = (void *)nla - (void *)skb->data; + else + A = 0; + continue; + } default: return 0; } -- cgit v1.2.3 From 0c19b0adb8dd33dbd10ff48e41971231c486855c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 20 Nov 2008 04:08:29 -0800 Subject: netlink: avoid memset of 0 bytes sparse warning A netlink attribute padding of zero triggers this sparse warning: include/linux/netlink.h:245:8: warning: memset with byte count of 0 Avoid the memset when the size parameter is constant and requires no padding. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 9ff1b54908f3..51b09a1f46c3 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -242,7 +242,8 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) nlh->nlmsg_flags = flags; nlh->nlmsg_pid = pid; nlh->nlmsg_seq = seq; - memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); + if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) + memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); return nlh; } -- cgit v1.2.3 From 13d2a1d2b032de08d7dcab6a1edcd47802681f96 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 20 Nov 2008 04:10:00 -0800 Subject: pkt_sched: add DRR scheduler Add classful DRR scheduler as a more flexible replacement for SFQ. The main difference to the algorithm described in "Efficient Fair Queueing using Deficit Round Robin" is that this implementation doesn't drop packets from the longest queue on overrun because its classful and limits are handled by each individual child qdisc. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/pkt_sched.h | 16 ++ net/sched/Kconfig | 11 + net/sched/Makefile | 1 + net/sched/sch_drr.c | 505 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 533 insertions(+) create mode 100644 net/sched/sch_drr.c (limited to 'include') diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 5d921fa91a5b..e3f133adba78 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -500,4 +500,20 @@ struct tc_netem_corrupt #define NETEM_DIST_SCALE 8192 +/* DRR */ + +enum +{ + TCA_DRR_UNSPEC, + TCA_DRR_QUANTUM, + __TCA_DRR_MAX +}; + +#define TCA_DRR_MAX (__TCA_DRR_MAX - 1) + +struct tc_drr_stats +{ + u32 deficit; +}; + #endif diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 36543b6fcef3..4f7ef0db302b 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -194,6 +194,17 @@ config NET_SCH_NETEM If unsure, say N. +config NET_SCH_DRR + tristate "Deficit Round Robin scheduler (DRR)" + help + Say Y here if you want to use the Deficit Round Robin (DRR) packet + scheduling algorithm. + + To compile this driver as a module, choose M here: the module + will be called sch_drr. + + If unsure, say N. + config NET_SCH_INGRESS tristate "Ingress Qdisc" depends on NET_CLS_ACT diff --git a/net/sched/Makefile b/net/sched/Makefile index 70b35f8708c3..54d950cd4b8d 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o +obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o obj-$(CONFIG_NET_CLS_FW) += cls_fw.o diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c new file mode 100644 index 000000000000..e71a5de23e23 --- /dev/null +++ b/net/sched/sch_drr.c @@ -0,0 +1,505 @@ +/* + * net/sched/sch_drr.c Deficit Round Robin scheduler + * + * Copyright (c) 2008 Patrick McHardy + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct drr_class { + struct Qdisc_class_common common; + unsigned int refcnt; + unsigned int filter_cnt; + + struct gnet_stats_basic bstats; + struct gnet_stats_queue qstats; + struct gnet_stats_rate_est rate_est; + struct list_head alist; + struct Qdisc *qdisc; + + u32 quantum; + u32 deficit; +}; + +struct drr_sched { + struct list_head active; + struct tcf_proto *filter_list; + struct Qdisc_class_hash clhash; +}; + +static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid) +{ + struct drr_sched *q = qdisc_priv(sch); + struct Qdisc_class_common *clc; + + clc = qdisc_class_find(&q->clhash, classid); + if (clc == NULL) + return NULL; + return container_of(clc, struct drr_class, common); +} + +static void drr_purge_queue(struct drr_class *cl) +{ + unsigned int len = cl->qdisc->q.qlen; + + qdisc_reset(cl->qdisc); + qdisc_tree_decrease_qlen(cl->qdisc, len); +} + +static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = { + [TCA_DRR_QUANTUM] = { .type = NLA_U32 }, +}; + +static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, + struct nlattr **tca, unsigned long *arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl = (struct drr_class *)*arg; + struct nlattr *tb[TCA_DRR_MAX + 1]; + u32 quantum; + int err; + + err = nla_parse_nested(tb, TCA_DRR_MAX, tca[TCA_OPTIONS], drr_policy); + if (err < 0) + return err; + + if (tb[TCA_DRR_QUANTUM]) { + quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]); + if (quantum == 0) + return -EINVAL; + } else + quantum = psched_mtu(qdisc_dev(sch)); + + if (cl != NULL) { + sch_tree_lock(sch); + if (tb[TCA_DRR_QUANTUM]) + cl->quantum = quantum; + sch_tree_unlock(sch); + + if (tca[TCA_RATE]) + gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + return 0; + } + + cl = kzalloc(sizeof(struct drr_class), GFP_KERNEL); + if (cl == NULL) + return -ENOBUFS; + + cl->refcnt = 1; + cl->common.classid = classid; + cl->quantum = quantum; + cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, classid); + if (cl->qdisc == NULL) + cl->qdisc = &noop_qdisc; + + if (tca[TCA_RATE]) + gen_replace_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE]); + + sch_tree_lock(sch); + qdisc_class_hash_insert(&q->clhash, &cl->common); + sch_tree_unlock(sch); + + qdisc_class_hash_grow(sch, &q->clhash); + + *arg = (unsigned long)cl; + return 0; +} + +static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl) +{ + gen_kill_estimator(&cl->bstats, &cl->rate_est); + qdisc_destroy(cl->qdisc); + kfree(cl); +} + +static int drr_delete_class(struct Qdisc *sch, unsigned long arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl = (struct drr_class *)arg; + + if (cl->filter_cnt > 0) + return -EBUSY; + + sch_tree_lock(sch); + + drr_purge_queue(cl); + qdisc_class_hash_remove(&q->clhash, &cl->common); + + if (--cl->refcnt == 0) + drr_destroy_class(sch, cl); + + sch_tree_unlock(sch); + return 0; +} + +static unsigned long drr_get_class(struct Qdisc *sch, u32 classid) +{ + struct drr_class *cl = drr_find_class(sch, classid); + + if (cl != NULL) + cl->refcnt++; + + return (unsigned long)cl; +} + +static void drr_put_class(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (--cl->refcnt == 0) + drr_destroy_class(sch, cl); +} + +static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl) +{ + struct drr_sched *q = qdisc_priv(sch); + + if (cl) + return NULL; + + return &q->filter_list; +} + +static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + struct drr_class *cl = drr_find_class(sch, classid); + + if (cl != NULL) + cl->filter_cnt++; + + return (unsigned long)cl; +} + +static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + cl->filter_cnt--; +} + +static int drr_graft_class(struct Qdisc *sch, unsigned long arg, + struct Qdisc *new, struct Qdisc **old) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (new == NULL) { + new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + &pfifo_qdisc_ops, cl->common.classid); + if (new == NULL) + new = &noop_qdisc; + } + + sch_tree_lock(sch); + drr_purge_queue(cl); + *old = xchg(&cl->qdisc, new); + sch_tree_unlock(sch); + return 0; +} + +static struct Qdisc *drr_class_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + return cl->qdisc; +} + +static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg) +{ + struct drr_class *cl = (struct drr_class *)arg; + + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); +} + +static int drr_dump_class(struct Qdisc *sch, unsigned long arg, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct drr_class *cl = (struct drr_class *)arg; + struct nlattr *nest; + + tcm->tcm_parent = TC_H_ROOT; + tcm->tcm_handle = cl->common.classid; + tcm->tcm_info = cl->qdisc->handle; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (nest == NULL) + goto nla_put_failure; + NLA_PUT_U32(skb, TCA_DRR_QUANTUM, cl->quantum); + return nla_nest_end(skb, nest); + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg, + struct gnet_dump *d) +{ + struct drr_class *cl = (struct drr_class *)arg; + struct tc_drr_stats xstats; + + memset(&xstats, 0, sizeof(xstats)); + if (cl->qdisc->q.qlen) + xstats.deficit = cl->deficit; + + if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || + gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || + gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0) + return -1; + + return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); +} + +static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n; + unsigned int i; + + if (arg->stop) + return; + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, (unsigned long)cl, arg) < 0) { + arg->stop = 1; + return; + } + arg->count++; + } + } +} + +static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, + int *qerr) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct tcf_result res; + int result; + + if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) { + cl = drr_find_class(sch, skb->priority); + if (cl != NULL) + return cl; + } + + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + result = tc_classify(skb, q->filter_list, &res); + if (result >= 0) { +#ifdef CONFIG_NET_CLS_ACT + switch (result) { + case TC_ACT_QUEUED: + case TC_ACT_STOLEN: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return NULL; + } +#endif + cl = (struct drr_class *)res.class; + if (cl == NULL) + cl = drr_find_class(sch, res.classid); + return cl; + } + return NULL; +} + +static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + unsigned int len; + int err; + + cl = drr_classify(skb, sch, &err); + if (cl == NULL) { + if (err & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return err; + } + + len = qdisc_pkt_len(skb); + err = qdisc_enqueue(skb, cl->qdisc); + if (unlikely(err != NET_XMIT_SUCCESS)) { + if (net_xmit_drop_count(err)) { + cl->qstats.drops++; + sch->qstats.drops++; + } + return err; + } + + if (cl->qdisc->q.qlen == 1) { + list_add_tail(&cl->alist, &q->active); + cl->deficit = cl->quantum; + } + + cl->bstats.packets++; + cl->bstats.bytes += len; + sch->bstats.packets++; + sch->bstats.bytes += len; + + sch->q.qlen++; + return err; +} + +static struct sk_buff *drr_dequeue(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct sk_buff *skb; + unsigned int len; + + while (!list_empty(&q->active)) { + cl = list_first_entry(&q->active, struct drr_class, alist); + skb = cl->qdisc->ops->peek(cl->qdisc); + if (skb == NULL) + goto skip; + + len = qdisc_pkt_len(skb); + if (len <= cl->deficit) { + cl->deficit -= len; + skb = qdisc_dequeue_peeked(cl->qdisc); + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); + sch->q.qlen--; + return skb; + } + + cl->deficit += cl->quantum; +skip: + list_move_tail(&cl->alist, &q->active); + } + return NULL; +} + +static unsigned int drr_drop(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + unsigned int len; + + list_for_each_entry(cl, &q->active, alist) { + if (cl->qdisc->ops->drop) { + len = cl->qdisc->ops->drop(cl->qdisc); + if (len > 0) { + if (cl->qdisc->q.qlen == 0) + list_del(&cl->alist); + return len; + } + } + } + return 0; +} + +static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt) +{ + struct drr_sched *q = qdisc_priv(sch); + int err; + + err = qdisc_class_hash_init(&q->clhash); + if (err < 0) + return err; + INIT_LIST_HEAD(&q->active); + return 0; +} + +static void drr_reset_qdisc(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n; + unsigned int i; + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { + if (cl->qdisc->q.qlen) + list_del(&cl->alist); + qdisc_reset(cl->qdisc); + } + } + sch->q.qlen = 0; +} + +static void drr_destroy_qdisc(struct Qdisc *sch) +{ + struct drr_sched *q = qdisc_priv(sch); + struct drr_class *cl; + struct hlist_node *n, *next; + unsigned int i; + + tcf_destroy_chain(&q->filter_list); + + for (i = 0; i < q->clhash.hashsize; i++) { + hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], + common.hnode) + drr_destroy_class(sch, cl); + } + qdisc_class_hash_destroy(&q->clhash); +} + +static const struct Qdisc_class_ops drr_class_ops = { + .change = drr_change_class, + .delete = drr_delete_class, + .get = drr_get_class, + .put = drr_put_class, + .tcf_chain = drr_tcf_chain, + .bind_tcf = drr_bind_tcf, + .unbind_tcf = drr_unbind_tcf, + .graft = drr_graft_class, + .leaf = drr_class_leaf, + .qlen_notify = drr_qlen_notify, + .dump = drr_dump_class, + .dump_stats = drr_dump_class_stats, + .walk = drr_walk, +}; + +static struct Qdisc_ops drr_qdisc_ops __read_mostly = { + .cl_ops = &drr_class_ops, + .id = "drr", + .priv_size = sizeof(struct drr_sched), + .enqueue = drr_enqueue, + .dequeue = drr_dequeue, + .peek = qdisc_peek_dequeued, + .drop = drr_drop, + .init = drr_init_qdisc, + .reset = drr_reset_qdisc, + .destroy = drr_destroy_qdisc, + .owner = THIS_MODULE, +}; + +static int __init drr_init(void) +{ + return register_qdisc(&drr_qdisc_ops); +} + +static void __exit drr_exit(void) +{ + unregister_qdisc(&drr_qdisc_ops); +} + +module_init(drr_init); +module_exit(drr_exit); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 018a7bf1e55000dd792194238c9043918d24d3dd Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 20 Nov 2008 15:59:56 +0100 Subject: netfilter: ip{,6}t_policy.h should include xp_policy.h It seems that all of the include/netfilter_{ipv4,ipv6}/{ipt,ip6t}_*.h which share constants include the corresponding include/netfilter/xp_*.h files. Neither ipt_policy.h not ip6t_policy.h do. Make these consistant with the norm. Signed-off-by: Andy Whitcroft Signed-off-by: Patrick McHardy --- include/linux/netfilter_ipv4/ipt_policy.h | 2 ++ include/linux/netfilter_ipv6/ip6t_policy.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ipt_policy.h b/include/linux/netfilter_ipv4/ipt_policy.h index b9478a255301..1037fb2cd206 100644 --- a/include/linux/netfilter_ipv4/ipt_policy.h +++ b/include/linux/netfilter_ipv4/ipt_policy.h @@ -1,6 +1,8 @@ #ifndef _IPT_POLICY_H #define _IPT_POLICY_H +#include + #define IPT_POLICY_MAX_ELEM XT_POLICY_MAX_ELEM /* ipt_policy_flags */ diff --git a/include/linux/netfilter_ipv6/ip6t_policy.h b/include/linux/netfilter_ipv6/ip6t_policy.h index 6bab3163d2fb..b1c449d7ec89 100644 --- a/include/linux/netfilter_ipv6/ip6t_policy.h +++ b/include/linux/netfilter_ipv6/ip6t_policy.h @@ -1,6 +1,8 @@ #ifndef _IP6T_POLICY_H #define _IP6T_POLICY_H +#include + #define IP6T_POLICY_MAX_ELEM XT_POLICY_MAX_ELEM /* ip6t_policy_flags */ -- cgit v1.2.3 From 008298231abbeb91bc7be9e8b078607b816d1a4a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:14:53 -0800 Subject: netdev: add more functions to netdevice ops This patch moves neigh_setup and hard_start_xmit into the network device ops structure. For bisection, fix all the previously converted drivers as well. Bonding driver took the biggest hit on this. Added a prefetch of the hard_start_xmit in the fast path to try and reduce any impact this would have. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/8139cp.c | 2 +- drivers/net/8139too.c | 2 +- drivers/net/acenic.c | 4 ++- drivers/net/atl1e/atl1e_main.c | 3 ++- drivers/net/atlx/atl1.c | 4 +-- drivers/net/atlx/atl2.c | 2 +- drivers/net/bonding/bond_main.c | 56 +++++++++++++++++++++++++++++++++-------- drivers/net/chelsio/cxgb2.c | 6 ++--- drivers/net/cxgb3/cxgb3_main.c | 1 - drivers/net/e100.c | 2 +- drivers/net/e1000/e1000_main.c | 2 +- drivers/net/e1000e/netdev.c | 2 +- drivers/net/enic/enic_main.c | 2 +- drivers/net/forcedeth.c | 22 +++++++++++++--- drivers/net/ifb.c | 4 +-- drivers/net/igb/igb_main.c | 2 +- drivers/net/ixgb/ixgb_main.c | 2 +- drivers/net/ixgbe/ixgbe_main.c | 2 +- drivers/net/loopback.c | 2 +- drivers/net/macvlan.c | 4 +-- drivers/net/niu.c | 2 +- drivers/net/ppp_generic.c | 5 ++-- drivers/net/r8169.c | 2 +- drivers/net/skge.c | 2 +- drivers/net/sky2.c | 3 ++- drivers/net/tg3.c | 45 ++++++++++++++++++++++----------- drivers/net/tun.c | 4 +-- drivers/net/veth.c | 2 +- drivers/net/via-velocity.c | 2 +- include/linux/netdevice.h | 39 ++++++++++++++++++---------- net/bridge/br_device.c | 10 ++++---- net/bridge/br_if.c | 2 +- net/core/dev.c | 12 ++++++--- net/core/neighbour.c | 6 ++--- net/core/netpoll.c | 6 +++-- net/core/pktgen.c | 8 +++--- 36 files changed, 183 insertions(+), 93 deletions(-) (limited to 'include') diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c index 13f75b67872d..f6d9d1353dd5 100644 --- a/drivers/net/8139cp.c +++ b/drivers/net/8139cp.c @@ -1824,6 +1824,7 @@ static const struct net_device_ops cp_netdev_ops = { .ndo_set_multicast_list = cp_set_rx_mode, .ndo_get_stats = cp_get_stats, .ndo_do_ioctl = cp_ioctl, + .ndo_start_xmit = cp_start_xmit, .ndo_tx_timeout = cp_tx_timeout, #if CP_VLAN_TAG_USED .ndo_vlan_rx_register = cp_vlan_rx_register, @@ -1949,7 +1950,6 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); dev->netdev_ops = &cp_netdev_ops; - dev->hard_start_xmit = cp_start_xmit; netif_napi_add(dev, &cp->napi, cp_rx_poll, 16); dev->ethtool_ops = &cp_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c index f8866552386a..445a479db79d 100644 --- a/drivers/net/8139too.c +++ b/drivers/net/8139too.c @@ -921,6 +921,7 @@ static const struct net_device_ops rtl8139_netdev_ops = { .ndo_stop = rtl8139_close, .ndo_get_stats = rtl8139_get_stats, .ndo_validate_addr = eth_validate_addr, + .ndo_start_xmit = rtl8139_start_xmit, .ndo_set_multicast_list = rtl8139_set_rx_mode, .ndo_do_ioctl = netdev_ioctl, .ndo_tx_timeout = rtl8139_tx_timeout, @@ -992,7 +993,6 @@ static int __devinit rtl8139_init_one (struct pci_dev *pdev, dev->netdev_ops = &rtl8139_netdev_ops; dev->ethtool_ops = &rtl8139_ethtool_ops; dev->watchdog_timeo = TX_TIMEOUT; - dev->hard_start_xmit = rtl8139_start_xmit; netif_napi_add(dev, &tp->napi, rtl8139_poll, 64); /* note: the hardware is not capable of sg/csum/highdma, however diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c index 309a90ea9211..21d24320210a 100644 --- a/drivers/net/acenic.c +++ b/drivers/net/acenic.c @@ -455,10 +455,13 @@ static const struct net_device_ops ace_netdev_ops = { .ndo_stop = ace_close, .ndo_tx_timeout = ace_watchdog, .ndo_get_stats = ace_get_stats, + .ndo_start_xmit = ace_start_xmit, .ndo_set_multicast_list = ace_set_multicast_list, .ndo_set_mac_address = ace_set_mac_addr, .ndo_change_mtu = ace_change_mtu, +#if ACENIC_DO_VLAN .ndo_vlan_rx_register = ace_vlan_rx_register, +#endif }; static int __devinit acenic_probe_one(struct pci_dev *pdev, @@ -489,7 +492,6 @@ static int __devinit acenic_probe_one(struct pci_dev *pdev, dev->watchdog_timeo = 5*HZ; dev->netdev_ops = &ace_netdev_ops; - dev->hard_start_xmit = &ace_start_xmit; SET_ETHTOOL_OPS(dev, &ace_ethtool_ops); /* we only display this string ONCE */ diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c index a815fffc2a5b..98b2a7a466b8 100644 --- a/drivers/net/atl1e/atl1e_main.c +++ b/drivers/net/atl1e/atl1e_main.c @@ -2256,6 +2256,7 @@ static void atl1e_shutdown(struct pci_dev *pdev) static const struct net_device_ops atl1e_netdev_ops = { .ndo_open = atl1e_open, .ndo_stop = atl1e_close, + .ndo_start_xmit = atl1e_xmit_frame, .ndo_get_stats = atl1e_get_stats, .ndo_set_multicast_list = atl1e_set_multi, .ndo_validate_addr = eth_validate_addr, @@ -2277,7 +2278,7 @@ static int atl1e_init_netdev(struct net_device *netdev, struct pci_dev *pdev) netdev->irq = pdev->irq; netdev->netdev_ops = &atl1e_netdev_ops; - netdev->hard_start_xmit = atl1e_xmit_frame, + netdev->watchdog_timeo = AT_TX_WATCHDOG; atl1e_set_ethtool_ops(netdev); diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index 7a0fb04e3480..aef7e47fdd24 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -2883,12 +2883,13 @@ static void atl1_poll_controller(struct net_device *netdev) static const struct net_device_ops atl1_netdev_ops = { .ndo_open = atl1_open, .ndo_stop = atl1_close, + .ndo_start_xmit = atl1_xmit_frame, .ndo_set_multicast_list = atlx_set_multi, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = atl1_set_mac, .ndo_change_mtu = atl1_change_mtu, .ndo_do_ioctl = atlx_ioctl, - .ndo_tx_timeout = atlx_tx_timeout, + .ndo_tx_timeout = atlx_tx_timeout, .ndo_vlan_rx_register = atlx_vlan_rx_register, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = atl1_poll_controller, @@ -2983,7 +2984,6 @@ static int __devinit atl1_probe(struct pci_dev *pdev, adapter->mii.reg_num_mask = 0x1f; netdev->netdev_ops = &atl1_netdev_ops; - netdev->hard_start_xmit = &atl1_xmit_frame; netdev->watchdog_timeo = 5 * HZ; netdev->ethtool_ops = &atl1_ethtool_ops; diff --git a/drivers/net/atlx/atl2.c b/drivers/net/atlx/atl2.c index b8d585722e1a..0326a84503e3 100644 --- a/drivers/net/atlx/atl2.c +++ b/drivers/net/atlx/atl2.c @@ -1315,6 +1315,7 @@ static void atl2_poll_controller(struct net_device *netdev) static const struct net_device_ops atl2_netdev_ops = { .ndo_open = atl2_open, .ndo_stop = atl2_close, + .ndo_start_xmit = atl2_xmit_frame, .ndo_set_multicast_list = atl2_set_multi, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = atl2_set_mac, @@ -1400,7 +1401,6 @@ static int __devinit atl2_probe(struct pci_dev *pdev, atl2_setup_pcicmd(pdev); - netdev->hard_start_xmit = &atl2_xmit_frame; netdev->netdev_ops = &atl2_netdev_ops; atl2_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 614656c8187b..a339a8052737 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1377,14 +1377,12 @@ done: return 0; } - static void bond_setup_by_slave(struct net_device *bond_dev, struct net_device *slave_dev) { struct bonding *bond = netdev_priv(bond_dev); - bond_dev->neigh_setup = slave_dev->neigh_setup; - bond_dev->header_ops = slave_dev->header_ops; + bond_dev->header_ops = slave_dev->header_ops; bond_dev->type = slave_dev->type; bond_dev->hard_header_len = slave_dev->hard_header_len; @@ -4124,6 +4122,20 @@ static void bond_set_multicast_list(struct net_device *bond_dev) read_unlock(&bond->lock); } +static int bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms) +{ + struct bonding *bond = netdev_priv(dev); + struct slave *slave = bond->first_slave; + + if (slave) { + const struct net_device_ops *slave_ops + = slave->dev->netdev_ops; + if (slave_ops->ndo_neigh_setup) + return slave_ops->ndo_neigh_setup(dev, parms); + } + return 0; +} + /* * Change the MTU of all of a master's slaves to match the master */ @@ -4490,6 +4502,35 @@ static void bond_set_xmit_hash_policy(struct bonding *bond) } } +static int bond_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + const struct bonding *bond = netdev_priv(dev); + + switch (bond->params.mode) { + case BOND_MODE_ROUNDROBIN: + return bond_xmit_roundrobin(skb, dev); + case BOND_MODE_ACTIVEBACKUP: + return bond_xmit_activebackup(skb, dev); + case BOND_MODE_XOR: + return bond_xmit_xor(skb, dev); + case BOND_MODE_BROADCAST: + return bond_xmit_broadcast(skb, dev); + case BOND_MODE_8023AD: + return bond_3ad_xmit_xor(skb, dev); + case BOND_MODE_ALB: + case BOND_MODE_TLB: + return bond_alb_xmit(skb, dev); + default: + /* Should never happen, mode already checked */ + printk(KERN_ERR DRV_NAME ": %s: Error: Unknown bonding mode %d\n", + dev->name, bond->params.mode); + WARN_ON_ONCE(1); + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } +} + + /* * set bond mode specific net device operations */ @@ -4499,28 +4540,22 @@ void bond_set_mode_ops(struct bonding *bond, int mode) switch (mode) { case BOND_MODE_ROUNDROBIN: - bond_dev->hard_start_xmit = bond_xmit_roundrobin; break; case BOND_MODE_ACTIVEBACKUP: - bond_dev->hard_start_xmit = bond_xmit_activebackup; break; case BOND_MODE_XOR: - bond_dev->hard_start_xmit = bond_xmit_xor; bond_set_xmit_hash_policy(bond); break; case BOND_MODE_BROADCAST: - bond_dev->hard_start_xmit = bond_xmit_broadcast; break; case BOND_MODE_8023AD: bond_set_master_3ad_flags(bond); - bond_dev->hard_start_xmit = bond_3ad_xmit_xor; bond_set_xmit_hash_policy(bond); break; case BOND_MODE_ALB: bond_set_master_alb_flags(bond); /* FALLTHRU */ case BOND_MODE_TLB: - bond_dev->hard_start_xmit = bond_alb_xmit; break; default: /* Should never happen, mode already checked */ @@ -4553,12 +4588,13 @@ static const struct ethtool_ops bond_ethtool_ops = { static const struct net_device_ops bond_netdev_ops = { .ndo_open = bond_open, .ndo_stop = bond_close, + .ndo_start_xmit = bond_start_xmit, .ndo_get_stats = bond_get_stats, .ndo_do_ioctl = bond_do_ioctl, .ndo_set_multicast_list = bond_set_multicast_list, .ndo_change_mtu = bond_change_mtu, - .ndo_validate_addr = NULL, .ndo_set_mac_address = bond_set_mac_address, + .ndo_neigh_setup = bond_neigh_setup, .ndo_vlan_rx_register = bond_vlan_rx_register, .ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid, diff --git a/drivers/net/chelsio/cxgb2.c b/drivers/net/chelsio/cxgb2.c index 482741797ebf..9b6011e7678e 100644 --- a/drivers/net/chelsio/cxgb2.c +++ b/drivers/net/chelsio/cxgb2.c @@ -915,7 +915,7 @@ static int t1_set_mac_addr(struct net_device *dev, void *p) } #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) -static void vlan_rx_register(struct net_device *dev, +static void t1_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) { struct adapter *adapter = dev->ml_priv; @@ -1013,6 +1013,7 @@ void t1_fatal_err(struct adapter *adapter) static const struct net_device_ops cxgb_netdev_ops = { .ndo_open = cxgb_open, .ndo_stop = cxgb_close, + .ndo_start_xmit = t1_start_xmit, .ndo_get_stats = t1_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = t1_set_rxmode, @@ -1020,7 +1021,7 @@ static const struct net_device_ops cxgb_netdev_ops = { .ndo_change_mtu = t1_change_mtu, .ndo_set_mac_address = t1_set_mac_addr, #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - .ndo_vlan_rx_register = vlan_rx_register, + .ndo_vlan_rx_register = t1_vlan_rx_register, #endif #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = t1_netpoll, @@ -1157,7 +1158,6 @@ static int __devinit init_one(struct pci_dev *pdev, } netdev->netdev_ops = &cxgb_netdev_ops; - netdev->hard_start_xmit = t1_start_xmit; netdev->hard_header_len += (adapter->flags & TSO_CAPABLE) ? sizeof(struct cpl_tx_pkt_lso) : sizeof(struct cpl_tx_pkt); diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c index a9479be53ec3..cd9fcaca70f3 100644 --- a/drivers/net/cxgb3/cxgb3_main.c +++ b/drivers/net/cxgb3/cxgb3_main.c @@ -2955,7 +2955,6 @@ static int __devinit init_one(struct pci_dev *pdev, netdev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; netdev->netdev_ops = &cxgb_netdev_ops; - netdev->hard_start_xmit = t3_eth_xmit; SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops); } diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 5894716de19f..2001a63794f5 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -2615,6 +2615,7 @@ static int e100_close(struct net_device *netdev) static const struct net_device_ops e100_netdev_ops = { .ndo_open = e100_open, .ndo_stop = e100_close, + .ndo_start_xmit = e100_xmit_frame, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = e100_set_multicast_list, .ndo_set_mac_address = e100_set_mac_address, @@ -2640,7 +2641,6 @@ static int __devinit e100_probe(struct pci_dev *pdev, } netdev->netdev_ops = &e100_netdev_ops; - netdev->hard_start_xmit = e100_xmit_frame; SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops); netdev->watchdog_timeo = E100_WATCHDOG_PERIOD; strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index debbba390d40..5c098c9d584e 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -891,6 +891,7 @@ static int e1000_is_need_ioport(struct pci_dev *pdev) static const struct net_device_ops e1000_netdev_ops = { .ndo_open = e1000_open, .ndo_stop = e1000_close, + .ndo_start_xmit = e1000_xmit_frame, .ndo_get_stats = e1000_get_stats, .ndo_set_rx_mode = e1000_set_rx_mode, .ndo_set_mac_address = e1000_set_mac, @@ -1001,7 +1002,6 @@ static int __devinit e1000_probe(struct pci_dev *pdev, } netdev->netdev_ops = &e1000_netdev_ops; - netdev->hard_start_xmit = &e1000_xmit_frame; e1000_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; netif_napi_add(netdev, &adapter->napi, e1000_clean, 64); diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index ced839e4cae8..cc0502bbb9ff 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -4707,6 +4707,7 @@ static void e1000_eeprom_checks(struct e1000_adapter *adapter) static const struct net_device_ops e1000e_netdev_ops = { .ndo_open = e1000_open, .ndo_stop = e1000_close, + .ndo_start_xmit = e1000_xmit_frame, .ndo_get_stats = e1000_get_stats, .ndo_set_multicast_list = e1000_set_multi, .ndo_set_mac_address = e1000_set_mac, @@ -4822,7 +4823,6 @@ static int __devinit e1000_probe(struct pci_dev *pdev, /* construct the net_device struct */ netdev->netdev_ops = &e1000e_netdev_ops; - netdev->hard_start_xmit = &e1000_xmit_frame; e1000e_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; netif_napi_add(netdev, &adapter->napi, e1000_clean, 64); diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c index 40f8c88b166d..1c409df735d4 100644 --- a/drivers/net/enic/enic_main.c +++ b/drivers/net/enic/enic_main.c @@ -1593,6 +1593,7 @@ static void enic_iounmap(struct enic *enic) static const struct net_device_ops enic_netdev_ops = { .ndo_open = enic_open, .ndo_stop = enic_stop, + .ndo_start_xmit = enic_hard_start_xmit, .ndo_get_stats = enic_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = enic_set_multicast_list, @@ -1830,7 +1831,6 @@ static int __devinit enic_probe(struct pci_dev *pdev, } netdev->netdev_ops = &enic_netdev_ops; - netdev->hard_start_xmit = enic_hard_start_xmit; netdev->watchdog_timeo = 2 * HZ; netdev->ethtool_ops = &enic_ethtool_ops; diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index dd2e1f670b0d..0d7e5750245a 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -5412,6 +5412,23 @@ static const struct net_device_ops nv_netdev_ops = { .ndo_open = nv_open, .ndo_stop = nv_close, .ndo_get_stats = nv_get_stats, + .ndo_start_xmit = nv_start_xmit, + .ndo_tx_timeout = nv_tx_timeout, + .ndo_change_mtu = nv_change_mtu, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = nv_set_mac_address, + .ndo_set_multicast_list = nv_set_multicast, + .ndo_vlan_rx_register = nv_vlan_rx_register, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = nv_poll_controller, +#endif +}; + +static const struct net_device_ops nv_netdev_ops_optimized = { + .ndo_open = nv_open, + .ndo_stop = nv_close, + .ndo_get_stats = nv_get_stats, + .ndo_start_xmit = nv_start_xmit_optimized, .ndo_tx_timeout = nv_tx_timeout, .ndo_change_mtu = nv_change_mtu, .ndo_validate_addr = eth_validate_addr, @@ -5592,11 +5609,10 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i goto out_freering; if (!nv_optimized(np)) - dev->hard_start_xmit = nv_start_xmit; + dev->netdev_ops = &nv_netdev_ops; else - dev->hard_start_xmit = nv_start_xmit_optimized; + dev->netdev_ops = &nv_netdev_ops_optimized; - dev->netdev_ops = &nv_netdev_ops; #ifdef CONFIG_FORCEDETH_NAPI netif_napi_add(dev, &np->napi, nv_napi_poll, RX_WORK_PER_LOOP); #endif diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 363a166df8fb..60a263001933 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -138,15 +138,15 @@ resched: } static const struct net_device_ops ifb_netdev_ops = { - .ndo_validate_addr = eth_validate_addr, .ndo_open = ifb_open, .ndo_stop = ifb_close, + .ndo_start_xmit = ifb_xmit, + .ndo_validate_addr = eth_validate_addr, }; static void ifb_setup(struct net_device *dev) { /* Initialize the device structure. */ - dev->hard_start_xmit = ifb_xmit; dev->destructor = free_netdev; dev->netdev_ops = &ifb_netdev_ops; diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index ceb0a0458796..eca5684d5655 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -953,6 +953,7 @@ static int igb_is_need_ioport(struct pci_dev *pdev) static const struct net_device_ops igb_netdev_ops = { .ndo_open = igb_open, .ndo_stop = igb_close, + .ndo_start_xmit = igb_xmit_frame_adv, .ndo_get_stats = igb_get_stats, .ndo_set_multicast_list = igb_set_multi, .ndo_set_mac_address = igb_set_mac, @@ -1080,7 +1081,6 @@ static int __devinit igb_probe(struct pci_dev *pdev, netdev->netdev_ops = &igb_netdev_ops; igb_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; - netdev->hard_start_xmit = &igb_xmit_frame_adv; strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index 3ca9daa70b38..a04e3892ddf4 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -324,6 +324,7 @@ ixgb_reset(struct ixgb_adapter *adapter) static const struct net_device_ops ixgb_netdev_ops = { .ndo_open = ixgb_open, .ndo_stop = ixgb_close, + .ndo_start_xmit = ixgb_xmit_frame, .ndo_get_stats = ixgb_get_stats, .ndo_set_multicast_list = ixgb_set_multi, .ndo_validate_addr = eth_validate_addr, @@ -414,7 +415,6 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } netdev->netdev_ops = &ixgb_netdev_ops; - netdev->hard_start_xmit = &ixgb_xmit_frame; ixgb_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; netif_napi_add(netdev, &adapter->napi, ixgb_clean, 64); diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 7ad07a00680a..40108523377f 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -3728,6 +3728,7 @@ static int ixgbe_link_config(struct ixgbe_hw *hw) static const struct net_device_ops ixgbe_netdev_ops = { .ndo_open = ixgbe_open, .ndo_stop = ixgbe_close, + .ndo_start_xmit = ixgbe_xmit_frame, .ndo_get_stats = ixgbe_get_stats, .ndo_set_multicast_list = ixgbe_set_rx_mode, .ndo_validate_addr = eth_validate_addr, @@ -3824,7 +3825,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, } netdev->netdev_ops = &ixgbe_netdev_ops; - netdev->hard_start_xmit = &ixgbe_xmit_frame; ixgbe_set_ethtool_ops(netdev); netdev->watchdog_timeo = 5 * HZ; strcpy(netdev->name, pci_name(pdev)); diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 958450124dec..b7d438a367f3 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -145,6 +145,7 @@ static void loopback_dev_free(struct net_device *dev) static const struct net_device_ops loopback_ops = { .ndo_init = loopback_dev_init, + .ndo_start_xmit= loopback_xmit, .ndo_get_stats = loopback_get_stats, }; @@ -155,7 +156,6 @@ static const struct net_device_ops loopback_ops = { static void loopback_setup(struct net_device *dev) { dev->mtu = (16 * 1024) + 20 + 20 + 12; - dev->hard_start_xmit = loopback_xmit; dev->hard_header_len = ETH_HLEN; /* 14 */ dev->addr_len = ETH_ALEN; /* 6 */ dev->tx_queue_len = 0; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index d00ea444e0a3..e8879217a1d2 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -140,7 +140,7 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb) return NULL; } -static int macvlan_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +static int macvlan_start_xmit(struct sk_buff *skb, struct net_device *dev) { const struct macvlan_dev *vlan = netdev_priv(dev); unsigned int len = skb->len; @@ -365,6 +365,7 @@ static const struct net_device_ops macvlan_netdev_ops = { .ndo_init = macvlan_init, .ndo_open = macvlan_open, .ndo_stop = macvlan_stop, + .ndo_start_xmit = macvlan_start_xmit, .ndo_change_mtu = macvlan_change_mtu, .ndo_change_rx_flags = macvlan_change_rx_flags, .ndo_set_mac_address = macvlan_set_mac_address, @@ -377,7 +378,6 @@ static void macvlan_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &macvlan_netdev_ops; - dev->hard_start_xmit = macvlan_hard_start_xmit; dev->destructor = free_netdev; dev->header_ops = &macvlan_hard_header_ops, dev->ethtool_ops = &macvlan_ethtool_ops; diff --git a/drivers/net/niu.c b/drivers/net/niu.c index 318537efd583..a8d10630f804 100644 --- a/drivers/net/niu.c +++ b/drivers/net/niu.c @@ -8892,6 +8892,7 @@ static struct net_device * __devinit niu_alloc_and_init( static const struct net_device_ops niu_netdev_ops = { .ndo_open = niu_open, .ndo_stop = niu_close, + .ndo_start_xmit = niu_start_xmit, .ndo_get_stats = niu_get_stats, .ndo_set_multicast_list = niu_set_rx_mode, .ndo_validate_addr = eth_validate_addr, @@ -8904,7 +8905,6 @@ static const struct net_device_ops niu_netdev_ops = { static void __devinit niu_assign_netdev_ops(struct net_device *dev) { dev->netdev_ops = &niu_netdev_ops; - dev->hard_start_xmit = niu_start_xmit; dev->ethtool_ops = &niu_ethtool_ops; dev->watchdog_timeo = NIU_TX_TIMEOUT; } diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index bad99e8cac33..1b15a088a3ba 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -972,7 +972,8 @@ ppp_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } static const struct net_device_ops ppp_netdev_ops = { - .ndo_do_ioctl = ppp_net_ioctl, + .ndo_start_xmit = ppp_start_xmit, + .ndo_do_ioctl = ppp_net_ioctl, }; static void ppp_setup(struct net_device *dev) @@ -2437,8 +2438,6 @@ ppp_create_interface(int unit, int *retp) skb_queue_head_init(&ppp->mrq); #endif /* CONFIG_PPP_MULTILINK */ - dev->hard_start_xmit = ppp_start_xmit; - ret = -EEXIST; mutex_lock(&all_ppp_mutex); if (unit < 0) diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index bac58ca628dd..dddf6aeff498 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -1927,6 +1927,7 @@ static const struct net_device_ops rtl8169_netdev_ops = { .ndo_open = rtl8169_open, .ndo_stop = rtl8169_close, .ndo_get_stats = rtl8169_get_stats, + .ndo_start_xmit = rtl8169_start_xmit, .ndo_tx_timeout = rtl8169_tx_timeout, .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = rtl8169_change_mtu, @@ -2125,7 +2126,6 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->dev_addr[i] = RTL_R8(MAC0 + i); memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); - dev->hard_start_xmit = rtl8169_start_xmit; SET_ETHTOOL_OPS(dev, &rtl8169_ethtool_ops); dev->watchdog_timeo = RTL8169_TX_TIMEOUT; dev->irq = pdev->irq; diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 93c1b1d92962..f73ee7974003 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -3805,6 +3805,7 @@ static __exit void skge_debug_cleanup(void) static const struct net_device_ops skge_netdev_ops = { .ndo_open = skge_up, .ndo_stop = skge_down, + .ndo_start_xmit = skge_xmit_frame, .ndo_do_ioctl = skge_ioctl, .ndo_get_stats = skge_get_stats, .ndo_tx_timeout = skge_tx_timeout, @@ -3831,7 +3832,6 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port, } SET_NETDEV_DEV(dev, &hw->pdev->dev); - dev->hard_start_xmit = skge_xmit_frame; dev->netdev_ops = &skge_netdev_ops; dev->ethtool_ops = &skge_ethtool_ops; dev->watchdog_timeo = TX_WATCHDOG; diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 251505125cb8..3668e81e474d 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -4047,6 +4047,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = { { .ndo_open = sky2_up, .ndo_stop = sky2_down, + .ndo_start_xmit = sky2_xmit_frame, .ndo_do_ioctl = sky2_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = sky2_set_mac_address, @@ -4063,6 +4064,7 @@ static const struct net_device_ops sky2_netdev_ops[2] = { { .ndo_open = sky2_up, .ndo_stop = sky2_down, + .ndo_start_xmit = sky2_xmit_frame, .ndo_do_ioctl = sky2_ioctl, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = sky2_set_mac_address, @@ -4090,7 +4092,6 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw, SET_NETDEV_DEV(dev, &hw->pdev->dev); dev->irq = hw->pdev->irq; - dev->hard_start_xmit = sky2_xmit_frame; SET_ETHTOOL_OPS(dev, &sky2_ethtool_ops); dev->watchdog_timeo = TX_WATCHDOG; dev->netdev_ops = &sky2_netdev_ops[port]; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 4b97cb601361..9ba18e1bc341 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -12614,19 +12614,6 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) else tp->tg3_flags &= ~TG3_FLAG_POLL_SERDES; - /* All chips before 5787 can get confused if TX buffers - * straddle the 4GB address boundary in some cases. - */ - if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) - tp->dev->hard_start_xmit = tg3_start_xmit; - else - tp->dev->hard_start_xmit = tg3_start_xmit_dma_bug; - tp->rx_offset = 2; if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701 && (tp->tg3_flags & TG3_FLAG_PCIX_MODE) != 0) @@ -13346,6 +13333,26 @@ static void __devinit tg3_init_coal(struct tg3 *tp) static const struct net_device_ops tg3_netdev_ops = { .ndo_open = tg3_open, .ndo_stop = tg3_close, + .ndo_start_xmit = tg3_start_xmit, + .ndo_get_stats = tg3_get_stats, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_multicast_list = tg3_set_rx_mode, + .ndo_set_mac_address = tg3_set_mac_addr, + .ndo_do_ioctl = tg3_ioctl, + .ndo_tx_timeout = tg3_tx_timeout, + .ndo_change_mtu = tg3_change_mtu, +#if TG3_VLAN_TAG_USED + .ndo_vlan_rx_register = tg3_vlan_rx_register, +#endif +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = tg3_poll_controller, +#endif +}; + +static const struct net_device_ops tg3_netdev_ops_dma_bug = { + .ndo_open = tg3_open, + .ndo_stop = tg3_close, + .ndo_start_xmit = tg3_start_xmit_dma_bug, .ndo_get_stats = tg3_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = tg3_set_rx_mode, @@ -13475,7 +13482,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, tp->rx_jumbo_pending = TG3_DEF_RX_JUMBO_RING_PENDING; tp->tx_pending = TG3_DEF_TX_RING_PENDING; - dev->netdev_ops = &tg3_netdev_ops; netif_napi_add(dev, &tp->napi, tg3_poll, 64); dev->ethtool_ops = &tg3_ethtool_ops; dev->watchdog_timeo = TG3_TX_TIMEOUT; @@ -13488,6 +13494,17 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, goto err_out_iounmap; } + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) + dev->netdev_ops = &tg3_netdev_ops; + else + dev->netdev_ops = &tg3_netdev_ops_dma_bug; + + /* The EPB bridge inside 5714, 5715, and 5780 and any * device behind the EPB cannot support DMA addresses > 40-bit. * On 64-bit systems with IOMMU, use 40-bit dma_mask. diff --git a/drivers/net/tun.c b/drivers/net/tun.c index b4c941444756..fd0b11ea5562 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -308,13 +308,14 @@ tun_net_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops tun_netdev_ops = { .ndo_open = tun_net_open, .ndo_stop = tun_net_close, + .ndo_start_xmit = tun_net_xmit, .ndo_change_mtu = tun_net_change_mtu, - }; static const struct net_device_ops tap_netdev_ops = { .ndo_open = tun_net_open, .ndo_stop = tun_net_close, + .ndo_start_xmit = tun_net_xmit, .ndo_change_mtu = tun_net_change_mtu, .ndo_set_multicast_list = tun_net_mclist, .ndo_set_mac_address = eth_mac_addr, @@ -691,7 +692,6 @@ static void tun_setup(struct net_device *dev) tun->owner = -1; tun->group = -1; - dev->hard_start_xmit = tun_net_xmit; dev->ethtool_ops = &tun_ethtool_ops; dev->destructor = free_netdev; dev->features |= NETIF_F_NETNS_LOCAL; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 4f93a55aaaa5..852d0e7c4e62 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -265,6 +265,7 @@ static void veth_dev_free(struct net_device *dev) static const struct net_device_ops veth_netdev_ops = { .ndo_init = veth_dev_init, .ndo_open = veth_open, + .ndo_start_xmit = veth_xmit, .ndo_get_stats = veth_get_stats, }; @@ -273,7 +274,6 @@ static void veth_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &veth_netdev_ops; - dev->hard_start_xmit = veth_xmit; dev->ethtool_ops = &veth_ethtool_ops; dev->features |= NETIF_F_LLTX; dev->destructor = veth_dev_free; diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index 033e63a68436..58e25d090ae0 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -852,6 +852,7 @@ static int velocity_soft_reset(struct velocity_info *vptr) static const struct net_device_ops velocity_netdev_ops = { .ndo_open = velocity_open, .ndo_stop = velocity_close, + .ndo_start_xmit = velocity_xmit, .ndo_get_stats = velocity_get_stats, .ndo_validate_addr = eth_validate_addr, .ndo_set_multicast_list = velocity_set_multi, @@ -971,7 +972,6 @@ static int __devinit velocity_found1(struct pci_dev *pdev, const struct pci_devi vptr->phy_id = MII_GET_PHY_ID(vptr->mac_regs); dev->irq = pdev->irq; - dev->hard_start_xmit = velocity_xmit; dev->netdev_ops = &velocity_netdev_ops; dev->ethtool_ops = &velocity_ethtool_ops; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 981a089d5149..d8fb23679ee3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -454,8 +454,8 @@ struct netdev_queue { /* * This structure defines the management hooks for network devices. - * The following hooks can bed defined and are optonal (can be null) - * unless otherwise noted. + * The following hooks can be defined; unless noted otherwise, they are + * optional and can be filled with a null pointer. * * int (*ndo_init)(struct net_device *dev); * This function is called once when network device is registered. @@ -475,6 +475,15 @@ struct netdev_queue { * This function is called when network device transistions to the down * state. * + * int (*ndo_hard_start_xmit)(struct sk_buff *skb, struct net_device *dev); + * Called when a packet needs to be transmitted. + * Must return NETDEV_TX_OK , NETDEV_TX_BUSY, or NETDEV_TX_LOCKED, + * Required can not be NULL. + * + * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb); + * Called to decide which queue to when device supports multiple + * transmit queues. + * * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); * This function is called to allow device receiver to make * changes to configuration when multicast or promiscious is enabled. @@ -508,7 +517,7 @@ struct netdev_queue { * of a device. If not defined, any request to change MTU will * will return an error. * - * void (*ndo_tx_timeout) (struct net_device *dev); + * void (*ndo_tx_timeout)(struct net_device *dev); * Callback uses when the transmitter has not made any progress * for dev->watchdog ticks. * @@ -538,6 +547,10 @@ struct net_device_ops { void (*ndo_uninit)(struct net_device *dev); int (*ndo_open)(struct net_device *dev); int (*ndo_stop)(struct net_device *dev); + int (*ndo_start_xmit) (struct sk_buff *skb, + struct net_device *dev); + u16 (*ndo_select_queue)(struct net_device *dev, + struct sk_buff *skb); #define HAVE_CHANGE_RX_FLAGS void (*ndo_change_rx_flags)(struct net_device *dev, int flags); @@ -557,8 +570,10 @@ struct net_device_ops { int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); #define HAVE_CHANGE_MTU - int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); - + int (*ndo_change_mtu)(struct net_device *dev, + int new_mtu); + int (*ndo_neigh_setup)(struct net_device *dev, + struct neigh_parms *); #define HAVE_TX_TIMEOUT void (*ndo_tx_timeout) (struct net_device *dev); @@ -761,18 +776,12 @@ struct net_device /* Number of TX queues currently active in device */ unsigned int real_num_tx_queues; - /* Map buffer to appropriate transmit queue */ - u16 (*select_queue)(struct net_device *dev, - struct sk_buff *skb); - unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; /* * One part is mostly used on xmit path (device) */ void *priv; /* pointer to private data */ - int (*hard_start_xmit) (struct sk_buff *skb, - struct net_device *dev); /* These may be needed for future network-power-down code. */ unsigned long trans_start; /* Time (in jiffies) of last Tx */ @@ -800,8 +809,6 @@ struct net_device /* Called from unregister, can be used to call free_netdev */ void (*destructor)(struct net_device *dev); - int (*neigh_setup)(struct net_device *dev, struct neigh_parms *); - #ifdef CONFIG_NETPOLL struct netpoll_info *npinfo; #endif @@ -842,6 +849,10 @@ struct net_device void (*uninit)(struct net_device *dev); int (*open)(struct net_device *dev); int (*stop)(struct net_device *dev); + int (*hard_start_xmit) (struct sk_buff *skb, + struct net_device *dev); + u16 (*select_queue)(struct net_device *dev, + struct sk_buff *skb); void (*change_rx_flags)(struct net_device *dev, int flags); void (*set_rx_mode)(struct net_device *dev); @@ -854,6 +865,8 @@ struct net_device int (*set_config)(struct net_device *dev, struct ifmap *map); int (*change_mtu)(struct net_device *dev, int new_mtu); + int (*neigh_setup)(struct net_device *dev, + struct neigh_parms *); void (*tx_timeout) (struct net_device *dev); struct net_device_stats* (*get_stats)(struct net_device *dev); void (*vlan_rx_register)(struct net_device *dev, diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 920ce3348398..18538d7460d7 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -163,10 +163,11 @@ static const struct ethtool_ops br_ethtool_ops = { static const struct net_device_ops br_netdev_ops = { .ndo_open = br_dev_open, .ndo_stop = br_dev_stop, - .ndo_set_mac_address = br_set_mac_address, - .ndo_set_multicast_list = br_dev_set_multicast_list, - .ndo_change_mtu = br_change_mtu, - .ndo_do_ioctl = br_dev_ioctl, + .ndo_start_xmit = br_dev_xmit, + .ndo_set_mac_address = br_set_mac_address, + .ndo_set_multicast_list = br_dev_set_multicast_list, + .ndo_change_mtu = br_change_mtu, + .ndo_do_ioctl = br_dev_ioctl, }; void br_dev_setup(struct net_device *dev) @@ -175,7 +176,6 @@ void br_dev_setup(struct net_device *dev) ether_setup(dev); dev->netdev_ops = &br_netdev_ops; - dev->hard_start_xmit = br_dev_xmit; dev->destructor = free_netdev; SET_ETHTOOL_OPS(dev, &br_ethtool_ops); dev->tx_queue_len = 0; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index ee3a8dd13f55..727c5c510a60 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -373,7 +373,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (dev->flags & IFF_LOOPBACK || dev->type != ARPHRD_ETHER) return -EINVAL; - if (dev->hard_start_xmit == br_dev_xmit) + if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) return -ELOOP; if (dev->br_port != NULL) diff --git a/net/core/dev.c b/net/core/dev.c index 8843f4e3f5e1..4615e9a443aa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1660,6 +1660,9 @@ static int dev_gso_segment(struct sk_buff *skb) int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { + const struct net_device_ops *ops = dev->netdev_ops; + + prefetch(&dev->netdev_ops->ndo_start_xmit); if (likely(!skb->next)) { if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); @@ -1671,7 +1674,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, goto gso; } - return dev->hard_start_xmit(skb, dev); + return ops->ndo_start_xmit(skb, dev); } gso: @@ -1681,7 +1684,7 @@ gso: skb->next = nskb->next; nskb->next = NULL; - rc = dev->hard_start_xmit(nskb, dev); + rc = ops->ndo_start_xmit(nskb, dev); if (unlikely(rc)) { nskb->next = skb->next; skb->next = nskb; @@ -1755,10 +1758,11 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) { + const struct net_device_ops *ops = dev->netdev_ops; u16 queue_index = 0; - if (dev->select_queue) - queue_index = dev->select_queue(dev, skb); + if (ops->ndo_select_queue) + queue_index = ops->ndo_select_queue(dev, skb); else if (dev->real_num_tx_queues > 1) queue_index = simple_tx_hash(dev, skb); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index cca6a55909eb..9c3717a23cf7 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1327,9 +1327,9 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { struct neigh_parms *p, *ref; - struct net *net; + struct net *net = dev_net(dev); + const struct net_device_ops *ops = dev->netdev_ops; - net = dev_net(dev); ref = lookup_neigh_params(tbl, net, 0); if (!ref) return NULL; @@ -1341,7 +1341,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); - if (dev->neigh_setup && dev->neigh_setup(dev, p)) { + if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { kfree(p); return NULL; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 630df6034444..96fb0519eb7a 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -58,6 +58,7 @@ static void queue_process(struct work_struct *work) while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; + const struct net_device_ops *ops = dev->netdev_ops; struct netdev_queue *txq; if (!netif_device_present(dev) || !netif_running(dev)) { @@ -71,7 +72,7 @@ static void queue_process(struct work_struct *work) __netif_tx_lock(txq, smp_processor_id()); if (netif_tx_queue_stopped(txq) || netif_tx_queue_frozen(txq) || - dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { + ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); __netif_tx_unlock(txq); local_irq_restore(flags); @@ -273,6 +274,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) int status = NETDEV_TX_BUSY; unsigned long tries; struct net_device *dev = np->dev; + const struct net_device_ops *ops = dev->netdev_ops; struct netpoll_info *npinfo = np->dev->npinfo; if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { @@ -293,7 +295,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) tries > 0; --tries) { if (__netif_tx_trylock(txq)) { if (!netif_tx_queue_stopped(txq)) - status = dev->hard_start_xmit(skb, dev); + status = ops->ndo_start_xmit(skb, dev); __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 4e77914c4d42..15e0c2c7aacf 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3352,14 +3352,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) { - struct net_device *odev = NULL; + struct net_device *odev = pkt_dev->odev; + int (*xmit)(struct sk_buff *, struct net_device *) + = odev->netdev_ops->ndo_start_xmit; struct netdev_queue *txq; __u64 idle_start = 0; u16 queue_map; int ret; - odev = pkt_dev->odev; - if (pkt_dev->delay_us || pkt_dev->delay_ns) { u64 now; @@ -3440,7 +3440,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) atomic_inc(&(pkt_dev->skb->users)); retry_now: - ret = odev->hard_start_xmit(pkt_dev->skb, odev); + ret = (*xmit)(pkt_dev->skb, odev); if (likely(ret == NETDEV_TX_OK)) { pkt_dev->last_ok = 1; pkt_dev->sofar++; -- cgit v1.2.3 From 145186a39570244aead77dc2efc559e5cac90548 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:29:48 -0800 Subject: fddi: convert to new network device ops Similar to ethernet. Convert infrastructure and the one lone FDDI driver (for the one lone user of that hardware??). Compile tested only. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/skfp/skfddi.c | 19 ++++++++++++------- include/linux/fddidevice.h | 1 + net/802/fddi.c | 8 ++++++-- 3 files changed, 19 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c index 282bb47deccc..7fbd4f84f272 100644 --- a/drivers/net/skfp/skfddi.c +++ b/drivers/net/skfp/skfddi.c @@ -168,6 +168,17 @@ static int num_boards; /* total number of adapters configured */ #define PRINTK(s, args...) #endif // DRIVERDEBUG +static const struct net_device_ops skfp_netdev_ops = { + .ndo_open = skfp_open, + .ndo_stop = skfp_close, + .ndo_start_xmit = skfp_send_pkt, + .ndo_get_stats = skfp_ctl_get_stats, + .ndo_change_mtu = fddi_change_mtu, + .ndo_set_multicast_list = skfp_ctl_set_multicast_list, + .ndo_set_mac_address = skfp_ctl_set_mac_address, + .ndo_do_ioctl = skfp_ioctl, +}; + /* * ================= * = skfp_init_one = @@ -253,13 +264,7 @@ static int skfp_init_one(struct pci_dev *pdev, } dev->irq = pdev->irq; - dev->get_stats = &skfp_ctl_get_stats; - dev->open = &skfp_open; - dev->stop = &skfp_close; - dev->hard_start_xmit = &skfp_send_pkt; - dev->set_multicast_list = &skfp_ctl_set_multicast_list; - dev->set_mac_address = &skfp_ctl_set_mac_address; - dev->do_ioctl = &skfp_ioctl; + dev->netdev_ops = &skfp_netdev_ops; SET_NETDEV_DEV(dev, &pdev->dev); diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h index e61e42dfd317..155bafd9e886 100644 --- a/include/linux/fddidevice.h +++ b/include/linux/fddidevice.h @@ -27,6 +27,7 @@ #ifdef __KERNEL__ extern __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev); +extern int fddi_change_mtu(struct net_device *dev, int new_mtu); extern struct net_device *alloc_fddidev(int sizeof_priv); #endif diff --git a/net/802/fddi.c b/net/802/fddi.c index 0549317b9356..f1611a1e06a7 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -167,23 +167,27 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev) EXPORT_SYMBOL(fddi_type_trans); -static int fddi_change_mtu(struct net_device *dev, int new_mtu) +int fddi_change_mtu(struct net_device *dev, int new_mtu) { if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN)) return(-EINVAL); dev->mtu = new_mtu; return(0); } +EXPORT_SYMBOL(fddi_change_mtu); static const struct header_ops fddi_header_ops = { .create = fddi_header, .rebuild = fddi_rebuild_header, }; + static void fddi_setup(struct net_device *dev) { - dev->change_mtu = fddi_change_mtu; dev->header_ops = &fddi_header_ops; +#ifdef CONFIG_COMPAT_NET_DEV_OPS + dev->change_mtu = fddi_change_mtu, +#endif dev->type = ARPHRD_FDDI; dev->hard_header_len = FDDI_K_SNAP_HLEN+3; /* Assume 802.2 SNAP hdr len + 3 pad bytes */ -- cgit v1.2.3 From 748ff68fad9600593c6abe47856037602bd5d133 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 20 Nov 2008 20:32:15 -0800 Subject: hippi: convert driver to net_device_ops Convert the HIPPI infrastructure for use with net_device_ops. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/rrunner.c | 15 +++++++++++---- include/linux/hippidevice.h | 4 +++- net/802/hippi.c | 14 +++++++++----- 3 files changed, 23 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c index 6e4131f9a933..b4e3ddd0b59c 100644 --- a/drivers/net/rrunner.c +++ b/drivers/net/rrunner.c @@ -63,6 +63,16 @@ MODULE_LICENSE("GPL"); static char version[] __devinitdata = "rrunner.c: v0.50 11/11/2002 Jes Sorensen (jes@wildopensource.com)\n"; + +static const struct net_device_ops rr_netdev_ops = { + .ndo_open = rr_open, + .ndo_stop = rr_close, + .ndo_do_ioctl = rr_ioctl, + .ndo_start_xmit = rr_start_xmit, + .ndo_change_mtu = hippi_change_mtu, + .ndo_set_mac_address = hippi_mac_addr, +}; + /* * Implementation notes: * @@ -115,10 +125,7 @@ static int __devinit rr_init_one(struct pci_dev *pdev, spin_lock_init(&rrpriv->lock); dev->irq = pdev->irq; - dev->open = &rr_open; - dev->hard_start_xmit = &rr_start_xmit; - dev->stop = &rr_close; - dev->do_ioctl = &rr_ioctl; + dev->netdev_ops = &rr_netdev_ops; dev->base_addr = pci_resource_start(pdev, 0); diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index bab303dafd6e..f148e4908410 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -32,7 +32,9 @@ struct hippi_cb { }; extern __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev); - +extern int hippi_change_mtu(struct net_device *dev, int new_mtu); +extern int hippi_mac_addr(struct net_device *dev, void *p); +extern int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p); extern struct net_device *alloc_hippi_dev(int sizeof_priv); #endif diff --git a/net/802/hippi.c b/net/802/hippi.c index e35dc1e0915d..313b9ebf92ee 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -144,7 +144,7 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev) EXPORT_SYMBOL(hippi_type_trans); -static int hippi_change_mtu(struct net_device *dev, int new_mtu) +int hippi_change_mtu(struct net_device *dev, int new_mtu) { /* * HIPPI's got these nice large MTUs. @@ -154,12 +154,13 @@ static int hippi_change_mtu(struct net_device *dev, int new_mtu) dev->mtu = new_mtu; return(0); } +EXPORT_SYMBOL(hippi_change_mtu); /* * For HIPPI we will actually use the lower 4 bytes of the hardware * address as the I-FIELD rather than the actual hardware address. */ -static int hippi_mac_addr(struct net_device *dev, void *p) +int hippi_mac_addr(struct net_device *dev, void *p) { struct sockaddr *addr = p; if (netif_running(dev)) @@ -167,8 +168,9 @@ static int hippi_mac_addr(struct net_device *dev, void *p) memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); return 0; } +EXPORT_SYMBOL(hippi_mac_addr); -static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) +int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) { /* Never send broadcast/multicast ARP messages */ p->mcast_probes = 0; @@ -181,6 +183,7 @@ static int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p) p->ucast_probes = 0; return 0; } +EXPORT_SYMBOL(hippi_neigh_setup_dev); static const struct header_ops hippi_header_ops = { .create = hippi_header, @@ -190,11 +193,12 @@ static const struct header_ops hippi_header_ops = { static void hippi_setup(struct net_device *dev) { - dev->set_multicast_list = NULL; +#ifdef CONFIG_COMPAT_NET_DEV_OPS dev->change_mtu = hippi_change_mtu; - dev->header_ops = &hippi_header_ops; dev->set_mac_address = hippi_mac_addr; dev->neigh_setup = hippi_neigh_setup_dev; +#endif + dev->header_ops = &hippi_header_ops; /* * We don't support HIPPI `ARP' for the time being, and probably -- cgit v1.2.3 From 9db66bdcc83749affe61c61eb8ff3cf08f42afec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Nov 2008 20:39:09 -0800 Subject: net: convert TCP/DCCP ehash rwlocks to spinlocks Now TCP & DCCP use RCU lookups, we can convert ehash rwlocks to spinlocks. /proc/net/tcp and other seq_file 'readers' can safely be converted to 'writers'. This should speedup writers, since spin_lock()/spin_unlock() only use one atomic operation instead of two for write_lock()/write_unlock() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 14 +++++++------- net/ipv4/inet_hashtables.c | 21 ++++++++++----------- net/ipv4/inet_timewait_sock.c | 22 +++++++++++----------- net/ipv4/tcp_ipv4.c | 12 ++++++------ net/ipv6/inet6_hashtables.c | 15 +++++++-------- 5 files changed, 41 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 62d2dd0d7860..28b3ee3e8d6d 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -116,7 +116,7 @@ struct inet_hashinfo { * TIME_WAIT sockets use a separate chain (twchain). */ struct inet_ehash_bucket *ehash; - rwlock_t *ehash_locks; + spinlock_t *ehash_locks; unsigned int ehash_size; unsigned int ehash_locks_mask; @@ -152,7 +152,7 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket( return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; } -static inline rwlock_t *inet_ehash_lockp( +static inline spinlock_t *inet_ehash_lockp( struct inet_hashinfo *hashinfo, unsigned int hash) { @@ -177,16 +177,16 @@ static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) size = 4096; if (sizeof(rwlock_t) != 0) { #ifdef CONFIG_NUMA - if (size * sizeof(rwlock_t) > PAGE_SIZE) - hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t)); + if (size * sizeof(spinlock_t) > PAGE_SIZE) + hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t)); else #endif - hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t), + hashinfo->ehash_locks = kmalloc(size * sizeof(spinlock_t), GFP_KERNEL); if (!hashinfo->ehash_locks) return ENOMEM; for (i = 0; i < size; i++) - rwlock_init(&hashinfo->ehash_locks[i]); + spin_lock_init(&hashinfo->ehash_locks[i]); } hashinfo->ehash_locks_mask = size - 1; return 0; @@ -197,7 +197,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) if (hashinfo->ehash_locks) { #ifdef CONFIG_NUMA unsigned int size = (hashinfo->ehash_locks_mask + 1) * - sizeof(rwlock_t); + sizeof(spinlock_t); if (size > PAGE_SIZE) vfree(hashinfo->ehash_locks); else diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 377d004e5723..4c273a9981a6 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -271,13 +271,12 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct net *net = sock_net(sk); unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hinfo, hash); + spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; - prefetch(head->chain.first); - write_lock(lock); + spin_lock(lock); /* Check TIME-WAIT sockets first. */ sk_nulls_for_each(sk2, node, &head->twchain) { @@ -308,8 +307,8 @@ unique: sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); if (twp) { *twp = tw; @@ -325,7 +324,7 @@ unique: return 0; not_unique: - write_unlock(lock); + spin_unlock(lock); return -EADDRNOTAVAIL; } @@ -340,7 +339,7 @@ void __inet_hash_nolisten(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; - rwlock_t *lock; + spinlock_t *lock; struct inet_ehash_bucket *head; WARN_ON(!sk_unhashed(sk)); @@ -350,10 +349,10 @@ void __inet_hash_nolisten(struct sock *sk) list = &head->chain; lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - write_lock(lock); + spin_lock(lock); __sk_nulls_add_node_rcu(sk, list); + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); } EXPORT_SYMBOL_GPL(__inet_hash_nolisten); @@ -402,12 +401,12 @@ void inet_unhash(struct sock *sk) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(&ilb->lock); } else { - rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - write_lock_bh(lock); + spin_lock_bh(lock); if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(lock); + spin_unlock_bh(lock); } } EXPORT_SYMBOL_GPL(inet_unhash); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 60689951ecdb..8554d0ea1719 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; /* Unlink from established hashes. */ - rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); + spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); - write_lock(lock); + spin_lock(lock); if (hlist_nulls_unhashed(&tw->tw_node)) { - write_unlock(lock); + spin_unlock(lock); return; } hlist_nulls_del_rcu(&tw->tw_node); sk_nulls_node_init(&tw->tw_node); - write_unlock(lock); + spin_unlock(lock); /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, @@ -76,7 +76,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in @@ -90,7 +90,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); - write_lock(lock); + spin_lock(lock); /* * Step 2: Hash TW into TIMEWAIT chain. @@ -104,7 +104,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock(lock); + spin_unlock(lock); } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); @@ -427,9 +427,9 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, for (h = 0; h < (hashinfo->ehash_size); h++) { struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, h); - rwlock_t *lock = inet_ehash_lockp(hashinfo, h); + spinlock_t *lock = inet_ehash_lockp(hashinfo, h); restart: - write_lock(lock); + spin_lock(lock); sk_nulls_for_each(sk, node, &head->twchain) { tw = inet_twsk(sk); @@ -438,13 +438,13 @@ restart: continue; atomic_inc(&tw->tw_refcnt); - write_unlock(lock); + spin_unlock(lock); inet_twsk_deschedule(tw, twdr); inet_twsk_put(tw); goto restart; } - write_unlock(lock); + spin_unlock(lock); } local_bh_enable(); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 330b08a12274..a81caa1be0cf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1970,13 +1970,13 @@ static void *established_get_first(struct seq_file *seq) struct sock *sk; struct hlist_nulls_node *node; struct inet_timewait_sock *tw; - rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); + spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); /* Lockless fast path for the common case of empty buckets */ if (empty_bucket(st)) continue; - read_lock_bh(lock); + spin_lock_bh(lock); sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family || !net_eq(sock_net(sk), net)) { @@ -1995,7 +1995,7 @@ static void *established_get_first(struct seq_file *seq) rc = tw; goto out; } - read_unlock_bh(lock); + spin_unlock_bh(lock); st->state = TCP_SEQ_STATE_ESTABLISHED; } out: @@ -2023,7 +2023,7 @@ get_tw: cur = tw; goto out; } - read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); st->state = TCP_SEQ_STATE_ESTABLISHED; /* Look for next non empty bucket */ @@ -2033,7 +2033,7 @@ get_tw: if (st->bucket >= tcp_hashinfo.ehash_size) return NULL; - read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); } else sk = sk_nulls_next(sk); @@ -2134,7 +2134,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: if (v) - read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); break; } } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 21544b9be259..e0fd68187f83 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -38,14 +38,14 @@ void __inet6_hash(struct sock *sk) } else { unsigned int hash; struct hlist_nulls_head *list; - rwlock_t *lock; + spinlock_t *lock; sk->sk_hash = hash = inet6_sk_ehashfn(sk); list = &inet_ehash_bucket(hashinfo, hash)->chain; lock = inet_ehash_lockp(hashinfo, hash); - write_lock(lock); + spin_lock(lock); __sk_nulls_add_node_rcu(sk, list); - write_unlock(lock); + spin_unlock(lock); } sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -195,13 +195,12 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hinfo, hash); + spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; - prefetch(head->chain.first); - write_lock(lock); + spin_lock(lock); /* Check TIME-WAIT sockets first. */ sk_nulls_for_each(sk2, node, &head->twchain) { @@ -230,8 +229,8 @@ unique: WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); sk->sk_hash = hash; + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); if (twp != NULL) { *twp = tw; @@ -246,7 +245,7 @@ unique: return 0; not_unique: - write_unlock(lock); + spin_unlock(lock); return -EADDRNOTAVAIL; } -- cgit v1.2.3 From 2f90b8657ec942d1880f720e0177ee71df7c8e3c Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 20:52:10 -0800 Subject: ixgbe: this patch adds support for DCB to the kernel and ixgbe driver This adds support for Data Center Bridging (DCB) features in the ixgbe driver and adds an rtnetlink interface for configuring DCB to the kernel. The DCB feature support included are Priority Grouping (PG) - which allows bandwidth guarantees to be allocated to groups to traffic based on the 802.1q priority, and Priority Based Flow Control (PFC) - which introduces a new MAC control PAUSE frame which works at granularity of the 802.1p priority instead of the link (IEEE 802.3x). Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- drivers/net/Kconfig | 10 + drivers/net/ixgbe/Makefile | 2 + drivers/net/ixgbe/ixgbe.h | 25 +- drivers/net/ixgbe/ixgbe_dcb.c | 332 +++++++++++++++++ drivers/net/ixgbe/ixgbe_dcb.h | 157 ++++++++ drivers/net/ixgbe/ixgbe_dcb_82598.c | 398 ++++++++++++++++++++ drivers/net/ixgbe/ixgbe_dcb_82598.h | 94 +++++ drivers/net/ixgbe/ixgbe_dcb_nl.c | 356 ++++++++++++++++++ drivers/net/ixgbe/ixgbe_ethtool.c | 30 +- drivers/net/ixgbe/ixgbe_main.c | 200 +++++++++- include/linux/dcbnl.h | 230 ++++++++++++ include/linux/netdevice.h | 8 + include/linux/rtnetlink.h | 5 + include/net/dcbnl.h | 44 +++ net/Kconfig | 1 + net/Makefile | 3 + net/dcb/Kconfig | 12 + net/dcb/Makefile | 1 + net/dcb/dcbnl.c | 704 ++++++++++++++++++++++++++++++++++++ 19 files changed, 2593 insertions(+), 19 deletions(-) create mode 100644 drivers/net/ixgbe/ixgbe_dcb.c create mode 100644 drivers/net/ixgbe/ixgbe_dcb.h create mode 100644 drivers/net/ixgbe/ixgbe_dcb_82598.c create mode 100644 drivers/net/ixgbe/ixgbe_dcb_82598.h create mode 100644 drivers/net/ixgbe/ixgbe_dcb_nl.c create mode 100644 include/linux/dcbnl.h create mode 100644 include/net/dcbnl.h create mode 100644 net/dcb/Kconfig create mode 100644 net/dcb/Makefile create mode 100644 net/dcb/dcbnl.c (limited to 'include') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index afa206590ada..efd461d7c2bb 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2451,6 +2451,16 @@ config IXGBE_DCA driver. DCA is a method for warming the CPU cache before data is used, with the intent of lessening the impact of cache misses. +config IXGBE_DCBNL + bool "Data Center Bridging (DCB) Support" + default n + depends on IXGBE && DCBNL + ---help--- + Say Y here if you want to use Data Center Bridging (DCB) in the + driver. + + If unsure, say N. + config IXGB tristate "Intel(R) PRO/10GbE support" depends on PCI diff --git a/drivers/net/ixgbe/Makefile b/drivers/net/ixgbe/Makefile index ccd83d9f579e..3228e508e628 100644 --- a/drivers/net/ixgbe/Makefile +++ b/drivers/net/ixgbe/Makefile @@ -34,3 +34,5 @@ obj-$(CONFIG_IXGBE) += ixgbe.o ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \ ixgbe_82598.o ixgbe_phy.o + +ixgbe-$(CONFIG_IXGBE_DCBNL) += ixgbe_dcb.o ixgbe_dcb_82598.o ixgbe_dcb_nl.o diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h index 132854f646ba..796f189f3879 100644 --- a/drivers/net/ixgbe/ixgbe.h +++ b/drivers/net/ixgbe/ixgbe.h @@ -35,7 +35,7 @@ #include "ixgbe_type.h" #include "ixgbe_common.h" - +#include "ixgbe_dcb.h" #ifdef CONFIG_IXGBE_DCA #include #endif @@ -84,6 +84,7 @@ #define IXGBE_TX_FLAGS_TSO (u32)(1 << 2) #define IXGBE_TX_FLAGS_IPV4 (u32)(1 << 3) #define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000 +#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000 #define IXGBE_TX_FLAGS_VLAN_SHIFT 16 #define IXGBE_MAX_LRO_DESCRIPTORS 8 @@ -134,7 +135,7 @@ struct ixgbe_ring { u16 reg_idx; /* holds the special value that gets the hardware register * offset associated with this ring, which is different - * for DCE and RSS modes */ + * for DCB and RSS modes */ #ifdef CONFIG_IXGBE_DCA /* cpu for tx queue */ @@ -152,8 +153,10 @@ struct ixgbe_ring { u16 rx_buf_len; }; +#define RING_F_DCB 0 #define RING_F_VMDQ 1 #define RING_F_RSS 2 +#define IXGBE_MAX_DCB_INDICES 8 #define IXGBE_MAX_RSS_INDICES 16 #define IXGBE_MAX_VMDQ_INDICES 16 struct ixgbe_ring_feature { @@ -164,6 +167,10 @@ struct ixgbe_ring_feature { #define MAX_RX_QUEUES 64 #define MAX_TX_QUEUES 32 +#define MAX_RX_PACKET_BUFFERS ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \ + ? 8 : 1) +#define MAX_TX_PACKET_BUFFERS MAX_RX_PACKET_BUFFERS + /* MAX_MSIX_Q_VECTORS of these are allocated, * but we only use one per queue-specific vector. */ @@ -215,6 +222,9 @@ struct ixgbe_adapter { struct work_struct reset_task; struct ixgbe_q_vector q_vector[MAX_MSIX_Q_VECTORS]; char name[MAX_MSIX_COUNT][IFNAMSIZ + 5]; + struct ixgbe_dcb_config dcb_cfg; + struct ixgbe_dcb_config temp_dcb_cfg; + u8 dcb_set_bitmap; /* Interrupt Throttle Rate */ u32 itr_setting; @@ -270,6 +280,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG_FAN_FAIL_CAPABLE (u32)(1 << 20) #define IXGBE_FLAG_NEED_LINK_UPDATE (u32)(1 << 22) #define IXGBE_FLAG_IN_WATCHDOG_TASK (u32)(1 << 23) +#define IXGBE_FLAG_DCB_ENABLED (u32)(1 << 24) /* default to trying for four seconds */ #define IXGBE_TRY_LINK_TIMEOUT (4 * HZ) @@ -313,6 +324,12 @@ enum ixgbe_boards { }; extern struct ixgbe_info ixgbe_82598_info; +#ifdef CONFIG_IXGBE_DCBNL +extern struct dcbnl_rtnl_ops dcbnl_ops; +extern int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg, + struct ixgbe_dcb_config *dst_dcb_cfg, + int tc_max); +#endif extern char ixgbe_driver_name[]; extern const char ixgbe_driver_version[]; @@ -327,5 +344,9 @@ extern int ixgbe_setup_tx_resources(struct ixgbe_adapter *, struct ixgbe_ring *) extern void ixgbe_free_rx_resources(struct ixgbe_adapter *, struct ixgbe_ring *); extern void ixgbe_free_tx_resources(struct ixgbe_adapter *, struct ixgbe_ring *); extern void ixgbe_update_stats(struct ixgbe_adapter *adapter); +extern void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter); +extern int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter); +void ixgbe_napi_add_all(struct ixgbe_adapter *adapter); +void ixgbe_napi_del_all(struct ixgbe_adapter *adapter); #endif /* _IXGBE_H_ */ diff --git a/drivers/net/ixgbe/ixgbe_dcb.c b/drivers/net/ixgbe/ixgbe_dcb.c new file mode 100644 index 000000000000..e2e28ac63dec --- /dev/null +++ b/drivers/net/ixgbe/ixgbe_dcb.c @@ -0,0 +1,332 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2007 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + Linux NICS + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + + +#include "ixgbe.h" +#include "ixgbe_type.h" +#include "ixgbe_dcb.h" +#include "ixgbe_dcb_82598.h" + +/** + * ixgbe_dcb_config - Struct containing DCB settings. + * @dcb_config: Pointer to DCB config structure + * + * This function checks DCB rules for DCB settings. + * The following rules are checked: + * 1. The sum of bandwidth percentages of all Bandwidth Groups must total 100%. + * 2. The sum of bandwidth percentages of all Traffic Classes within a Bandwidth + * Group must total 100. + * 3. A Traffic Class should not be set to both Link Strict Priority + * and Group Strict Priority. + * 4. Link strict Bandwidth Groups can only have link strict traffic classes + * with zero bandwidth. + */ +s32 ixgbe_dcb_check_config(struct ixgbe_dcb_config *dcb_config) +{ + struct tc_bw_alloc *p; + s32 ret_val = 0; + u8 i, j, bw = 0, bw_id; + u8 bw_sum[2][MAX_BW_GROUP]; + bool link_strict[2][MAX_BW_GROUP]; + + memset(bw_sum, 0, sizeof(bw_sum)); + memset(link_strict, 0, sizeof(link_strict)); + + /* First Tx, then Rx */ + for (i = 0; i < 2; i++) { + /* Check each traffic class for rule violation */ + for (j = 0; j < MAX_TRAFFIC_CLASS; j++) { + p = &dcb_config->tc_config[j].path[i]; + + bw = p->bwg_percent; + bw_id = p->bwg_id; + + if (bw_id >= MAX_BW_GROUP) { + ret_val = DCB_ERR_CONFIG; + goto err_config; + } + if (p->prio_type == prio_link) { + link_strict[i][bw_id] = true; + /* Link strict should have zero bandwidth */ + if (bw) { + ret_val = DCB_ERR_LS_BW_NONZERO; + goto err_config; + } + } else if (!bw) { + /* + * Traffic classes without link strict + * should have non-zero bandwidth. + */ + ret_val = DCB_ERR_TC_BW_ZERO; + goto err_config; + } + bw_sum[i][bw_id] += bw; + } + + bw = 0; + + /* Check each bandwidth group for rule violation */ + for (j = 0; j < MAX_BW_GROUP; j++) { + bw += dcb_config->bw_percentage[i][j]; + /* + * Sum of bandwidth percentages of all traffic classes + * within a Bandwidth Group must total 100 except for + * link strict group (zero bandwidth). + */ + if (link_strict[i][j]) { + if (bw_sum[i][j]) { + /* + * Link strict group should have zero + * bandwidth. + */ + ret_val = DCB_ERR_LS_BWG_NONZERO; + goto err_config; + } + } else if (bw_sum[i][j] != BW_PERCENT && + bw_sum[i][j] != 0) { + ret_val = DCB_ERR_TC_BW; + goto err_config; + } + } + + if (bw != BW_PERCENT) { + ret_val = DCB_ERR_BW_GROUP; + goto err_config; + } + } + +err_config: + return ret_val; +} + +/** + * ixgbe_dcb_calculate_tc_credits - Calculates traffic class credits + * @ixgbe_dcb_config: Struct containing DCB settings. + * @direction: Configuring either Tx or Rx. + * + * This function calculates the credits allocated to each traffic class. + * It should be called only after the rules are checked by + * ixgbe_dcb_check_config(). + */ +s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_dcb_config *dcb_config, + u8 direction) +{ + struct tc_bw_alloc *p; + s32 ret_val = 0; + /* Initialization values default for Tx settings */ + u32 credit_refill = 0; + u32 credit_max = 0; + u16 link_percentage = 0; + u8 bw_percent = 0; + u8 i; + + if (dcb_config == NULL) { + ret_val = DCB_ERR_CONFIG; + goto out; + } + + /* Find out the link percentage for each TC first */ + for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { + p = &dcb_config->tc_config[i].path[direction]; + bw_percent = dcb_config->bw_percentage[direction][p->bwg_id]; + + link_percentage = p->bwg_percent; + /* Must be careful of integer division for very small nums */ + link_percentage = (link_percentage * bw_percent) / 100; + if (p->bwg_percent > 0 && link_percentage == 0) + link_percentage = 1; + + /* Save link_percentage for reference */ + p->link_percent = (u8)link_percentage; + + /* Calculate credit refill and save it */ + credit_refill = link_percentage * MINIMUM_CREDIT_REFILL; + p->data_credits_refill = (u16)credit_refill; + + /* Calculate maximum credit for the TC */ + credit_max = (link_percentage * MAX_CREDIT) / 100; + + /* + * Adjustment based on rule checking, if the percentage + * of a TC is too small, the maximum credit may not be + * enough to send out a jumbo frame in data plane arbitration. + */ + if (credit_max && (credit_max < MINIMUM_CREDIT_FOR_JUMBO)) + credit_max = MINIMUM_CREDIT_FOR_JUMBO; + + if (direction == DCB_TX_CONFIG) { + /* + * Adjustment based on rule checking, if the + * percentage of a TC is too small, the maximum + * credit may not be enough to send out a TSO + * packet in descriptor plane arbitration. + */ + if (credit_max && + (credit_max < MINIMUM_CREDIT_FOR_TSO)) + credit_max = MINIMUM_CREDIT_FOR_TSO; + + dcb_config->tc_config[i].desc_credits_max = + (u16)credit_max; + } + + p->data_credits_max = (u16)credit_max; + } + +out: + return ret_val; +} + +/** + * ixgbe_dcb_get_tc_stats - Returns status of each traffic class + * @hw: pointer to hardware structure + * @stats: pointer to statistics structure + * @tc_count: Number of elements in bwg_array. + * + * This function returns the status data for each of the Traffic Classes in use. + */ +s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats, + u8 tc_count) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_get_tc_stats_82598(hw, stats, tc_count); + return ret; +} + +/** + * ixgbe_dcb_get_pfc_stats - Returns CBFC status of each traffic class + * hw - pointer to hardware structure + * stats - pointer to statistics structure + * tc_count - Number of elements in bwg_array. + * + * This function returns the CBFC status data for each of the Traffic Classes. + */ +s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *hw, struct ixgbe_hw_stats *stats, + u8 tc_count) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_get_pfc_stats_82598(hw, stats, tc_count); + return ret; +} + +/** + * ixgbe_dcb_config_rx_arbiter - Config Rx arbiter + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Rx Data Arbiter and credits for each traffic class. + */ +s32 ixgbe_dcb_config_rx_arbiter(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_config_rx_arbiter_82598(hw, dcb_config); + return ret; +} + +/** + * ixgbe_dcb_config_tx_desc_arbiter - Config Tx Desc arbiter + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Tx Descriptor Arbiter and credits for each traffic class. + */ +s32 ixgbe_dcb_config_tx_desc_arbiter(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_config_tx_desc_arbiter_82598(hw, dcb_config); + return ret; +} + +/** + * ixgbe_dcb_config_tx_data_arbiter - Config Tx data arbiter + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Tx Data Arbiter and credits for each traffic class. + */ +s32 ixgbe_dcb_config_tx_data_arbiter(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_config_tx_data_arbiter_82598(hw, dcb_config); + return ret; +} + +/** + * ixgbe_dcb_config_pfc - Config priority flow control + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Priority Flow Control for each traffic class. + */ +s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_config_pfc_82598(hw, dcb_config); + return ret; +} + +/** + * ixgbe_dcb_config_tc_stats - Config traffic class statistics + * @hw: pointer to hardware structure + * + * Configure queue statistics registers, all queues belonging to same traffic + * class uses a single set of queue statistics counters. + */ +s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *hw) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_config_tc_stats_82598(hw); + return ret; +} + +/** + * ixgbe_dcb_hw_config - Config and enable DCB + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure dcb settings and enable dcb mode. + */ +s32 ixgbe_dcb_hw_config(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + s32 ret = 0; + if (hw->mac.type == ixgbe_mac_82598EB) + ret = ixgbe_dcb_hw_config_82598(hw, dcb_config); + return ret; +} + diff --git a/drivers/net/ixgbe/ixgbe_dcb.h b/drivers/net/ixgbe/ixgbe_dcb.h new file mode 100644 index 000000000000..62dfd243bedc --- /dev/null +++ b/drivers/net/ixgbe/ixgbe_dcb.h @@ -0,0 +1,157 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2007 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + Linux NICS + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _DCB_CONFIG_H_ +#define _DCB_CONFIG_H_ + +#include "ixgbe_type.h" + +/* DCB data structures */ + +#define IXGBE_MAX_PACKET_BUFFERS 8 +#define MAX_USER_PRIORITY 8 +#define MAX_TRAFFIC_CLASS 8 +#define MAX_BW_GROUP 8 +#define BW_PERCENT 100 + +#define DCB_TX_CONFIG 0 +#define DCB_RX_CONFIG 1 + +/* DCB error Codes */ +#define DCB_SUCCESS 0 +#define DCB_ERR_CONFIG -1 +#define DCB_ERR_PARAM -2 + +/* Transmit and receive Errors */ +/* Error in bandwidth group allocation */ +#define DCB_ERR_BW_GROUP -3 +/* Error in traffic class bandwidth allocation */ +#define DCB_ERR_TC_BW -4 +/* Traffic class has both link strict and group strict enabled */ +#define DCB_ERR_LS_GS -5 +/* Link strict traffic class has non zero bandwidth */ +#define DCB_ERR_LS_BW_NONZERO -6 +/* Link strict bandwidth group has non zero bandwidth */ +#define DCB_ERR_LS_BWG_NONZERO -7 +/* Traffic class has zero bandwidth */ +#define DCB_ERR_TC_BW_ZERO -8 + +#define DCB_NOT_IMPLEMENTED 0x7FFFFFFF + +struct dcb_pfc_tc_debug { + u8 tc; + u8 pause_status; + u64 pause_quanta; +}; + +enum strict_prio_type { + prio_none = 0, + prio_group, + prio_link +}; + +/* Traffic class bandwidth allocation per direction */ +struct tc_bw_alloc { + u8 bwg_id; /* Bandwidth Group (BWG) ID */ + u8 bwg_percent; /* % of BWG's bandwidth */ + u8 link_percent; /* % of link bandwidth */ + u8 up_to_tc_bitmap; /* User Priority to Traffic Class mapping */ + u16 data_credits_refill; /* Credit refill amount in 64B granularity */ + u16 data_credits_max; /* Max credits for a configured packet buffer + * in 64B granularity.*/ + enum strict_prio_type prio_type; /* Link or Group Strict Priority */ +}; + +enum dcb_pfc_type { + pfc_disabled = 0, + pfc_enabled_full, + pfc_enabled_tx, + pfc_enabled_rx +}; + +/* Traffic class configuration */ +struct tc_configuration { + struct tc_bw_alloc path[2]; /* One each for Tx/Rx */ + enum dcb_pfc_type dcb_pfc; /* Class based flow control setting */ + + u16 desc_credits_max; /* For Tx Descriptor arbitration */ + u8 tc; /* Traffic class (TC) */ +}; + +enum dcb_rx_pba_cfg { + pba_equal, /* PBA[0-7] each use 64KB FIFO */ + pba_80_48 /* PBA[0-3] each use 80KB, PBA[4-7] each use 48KB */ +}; + +struct ixgbe_dcb_config { + struct tc_configuration tc_config[MAX_TRAFFIC_CLASS]; + u8 bw_percentage[2][MAX_BW_GROUP]; /* One each for Tx/Rx */ + + bool round_robin_enable; + + enum dcb_rx_pba_cfg rx_pba_cfg; + + u32 dcb_cfg_version; /* Not used...OS-specific? */ + u32 link_speed; /* For bandwidth allocation validation purpose */ +}; + +/* DCB driver APIs */ + +/* DCB rule checking function.*/ +s32 ixgbe_dcb_check_config(struct ixgbe_dcb_config *config); + +/* DCB credits calculation */ +s32 ixgbe_dcb_calculate_tc_credits(struct ixgbe_dcb_config *, u8); + +/* DCB PFC functions */ +s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *, struct ixgbe_dcb_config *g); +s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8); + +/* DCB traffic class stats */ +s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *); +s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8); + +/* DCB config arbiters */ +s32 ixgbe_dcb_config_tx_desc_arbiter(struct ixgbe_hw *, + struct ixgbe_dcb_config *); +s32 ixgbe_dcb_config_tx_data_arbiter(struct ixgbe_hw *, + struct ixgbe_dcb_config *); +s32 ixgbe_dcb_config_rx_arbiter(struct ixgbe_hw *, struct ixgbe_dcb_config *); + +/* DCB hw initialization */ +s32 ixgbe_dcb_hw_config(struct ixgbe_hw *, struct ixgbe_dcb_config *); + +/* DCB definitions for credit calculation */ +#define MAX_CREDIT_REFILL 511 /* 0x1FF * 64B = 32704B */ +#define MINIMUM_CREDIT_REFILL 5 /* 5*64B = 320B */ +#define MINIMUM_CREDIT_FOR_JUMBO 145 /* 145= UpperBound((9*1024+54)/64B) for 9KB jumbo frame */ +#define DCB_MAX_TSO_SIZE (32*1024) /* MAX TSO packet size supported in DCB mode */ +#define MINIMUM_CREDIT_FOR_TSO (DCB_MAX_TSO_SIZE/64 + 1) /* 513 for 32KB TSO packet */ +#define MAX_CREDIT 4095 /* Maximum credit supported: 256KB * 1204 / 64B */ + +#endif /* _DCB_CONFIG_H */ diff --git a/drivers/net/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ixgbe/ixgbe_dcb_82598.c new file mode 100644 index 000000000000..fce6867a4517 --- /dev/null +++ b/drivers/net/ixgbe/ixgbe_dcb_82598.c @@ -0,0 +1,398 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2007 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + Linux NICS + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe.h" +#include "ixgbe_type.h" +#include "ixgbe_dcb.h" +#include "ixgbe_dcb_82598.h" + +/** + * ixgbe_dcb_get_tc_stats_82598 - Return status data for each traffic class + * @hw: pointer to hardware structure + * @stats: pointer to statistics structure + * @tc_count: Number of elements in bwg_array. + * + * This function returns the status data for each of the Traffic Classes in use. + */ +s32 ixgbe_dcb_get_tc_stats_82598(struct ixgbe_hw *hw, + struct ixgbe_hw_stats *stats, + u8 tc_count) +{ + int tc; + + if (tc_count > MAX_TRAFFIC_CLASS) + return DCB_ERR_PARAM; + + /* Statistics pertaining to each traffic class */ + for (tc = 0; tc < tc_count; tc++) { + /* Transmitted Packets */ + stats->qptc[tc] += IXGBE_READ_REG(hw, IXGBE_QPTC(tc)); + /* Transmitted Bytes */ + stats->qbtc[tc] += IXGBE_READ_REG(hw, IXGBE_QBTC(tc)); + /* Received Packets */ + stats->qprc[tc] += IXGBE_READ_REG(hw, IXGBE_QPRC(tc)); + /* Received Bytes */ + stats->qbrc[tc] += IXGBE_READ_REG(hw, IXGBE_QBRC(tc)); + } + + return 0; +} + +/** + * ixgbe_dcb_get_pfc_stats_82598 - Returns CBFC status data + * @hw: pointer to hardware structure + * @stats: pointer to statistics structure + * @tc_count: Number of elements in bwg_array. + * + * This function returns the CBFC status data for each of the Traffic Classes. + */ +s32 ixgbe_dcb_get_pfc_stats_82598(struct ixgbe_hw *hw, + struct ixgbe_hw_stats *stats, + u8 tc_count) +{ + int tc; + + if (tc_count > MAX_TRAFFIC_CLASS) + return DCB_ERR_PARAM; + + for (tc = 0; tc < tc_count; tc++) { + /* Priority XOFF Transmitted */ + stats->pxofftxc[tc] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(tc)); + /* Priority XOFF Received */ + stats->pxoffrxc[tc] += IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(tc)); + } + + return 0; +} + +/** + * ixgbe_dcb_config_packet_buffers_82598 - Configure packet buffers + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure packet buffers for DCB mode. + */ +s32 ixgbe_dcb_config_packet_buffers_82598(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + s32 ret_val = 0; + u32 value = IXGBE_RXPBSIZE_64KB; + u8 i = 0; + + /* Setup Rx packet buffer sizes */ + switch (dcb_config->rx_pba_cfg) { + case pba_80_48: + /* Setup the first four at 80KB */ + value = IXGBE_RXPBSIZE_80KB; + for (; i < 4; i++) + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), value); + /* Setup the last four at 48KB...don't re-init i */ + value = IXGBE_RXPBSIZE_48KB; + /* Fall Through */ + case pba_equal: + default: + for (; i < IXGBE_MAX_PACKET_BUFFERS; i++) + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), value); + + /* Setup Tx packet buffer sizes */ + for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) { + IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), + IXGBE_TXPBSIZE_40KB); + } + break; + } + + return ret_val; +} + +/** + * ixgbe_dcb_config_rx_arbiter_82598 - Config Rx data arbiter + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Rx Data Arbiter and credits for each traffic class. + */ +s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + struct tc_bw_alloc *p; + u32 reg = 0; + u32 credit_refill = 0; + u32 credit_max = 0; + u8 i = 0; + + reg = IXGBE_READ_REG(hw, IXGBE_RUPPBMR) | IXGBE_RUPPBMR_MQA; + IXGBE_WRITE_REG(hw, IXGBE_RUPPBMR, reg); + + reg = IXGBE_READ_REG(hw, IXGBE_RMCS); + /* Enable Arbiter */ + reg &= ~IXGBE_RMCS_ARBDIS; + /* Enable Receive Recycle within the BWG */ + reg |= IXGBE_RMCS_RRM; + /* Enable Deficit Fixed Priority arbitration*/ + reg |= IXGBE_RMCS_DFP; + + IXGBE_WRITE_REG(hw, IXGBE_RMCS, reg); + + /* Configure traffic class credits and priority */ + for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { + p = &dcb_config->tc_config[i].path[DCB_RX_CONFIG]; + credit_refill = p->data_credits_refill; + credit_max = p->data_credits_max; + + reg = credit_refill | (credit_max << IXGBE_RT2CR_MCL_SHIFT); + + if (p->prio_type == prio_link) + reg |= IXGBE_RT2CR_LSP; + + IXGBE_WRITE_REG(hw, IXGBE_RT2CR(i), reg); + } + + reg = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); + reg |= IXGBE_RDRXCTL_RDMTS_1_2; + reg |= IXGBE_RDRXCTL_MPBEN; + reg |= IXGBE_RDRXCTL_MCEN; + IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, reg); + + reg = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + /* Make sure there is enough descriptors before arbitration */ + reg &= ~IXGBE_RXCTRL_DMBYPS; + IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, reg); + + return 0; +} + +/** + * ixgbe_dcb_config_tx_desc_arbiter_82598 - Config Tx Desc. arbiter + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Tx Descriptor Arbiter and credits for each traffic class. + */ +s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + struct tc_bw_alloc *p; + u32 reg, max_credits; + u8 i; + + reg = IXGBE_READ_REG(hw, IXGBE_DPMCS); + + /* Enable arbiter */ + reg &= ~IXGBE_DPMCS_ARBDIS; + if (!(dcb_config->round_robin_enable)) { + /* Enable DFP and Recycle mode */ + reg |= (IXGBE_DPMCS_TDPAC | IXGBE_DPMCS_TRM); + } + reg |= IXGBE_DPMCS_TSOEF; + /* Configure Max TSO packet size 34KB including payload and headers */ + reg |= (0x4 << IXGBE_DPMCS_MTSOS_SHIFT); + + IXGBE_WRITE_REG(hw, IXGBE_DPMCS, reg); + + /* Configure traffic class credits and priority */ + for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { + p = &dcb_config->tc_config[i].path[DCB_TX_CONFIG]; + max_credits = dcb_config->tc_config[i].desc_credits_max; + reg = max_credits << IXGBE_TDTQ2TCCR_MCL_SHIFT; + reg |= p->data_credits_refill; + reg |= (u32)(p->bwg_id) << IXGBE_TDTQ2TCCR_BWG_SHIFT; + + if (p->prio_type == prio_group) + reg |= IXGBE_TDTQ2TCCR_GSP; + + if (p->prio_type == prio_link) + reg |= IXGBE_TDTQ2TCCR_LSP; + + IXGBE_WRITE_REG(hw, IXGBE_TDTQ2TCCR(i), reg); + } + + return 0; +} + +/** + * ixgbe_dcb_config_tx_data_arbiter_82598 - Config Tx data arbiter + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Tx Data Arbiter and credits for each traffic class. + */ +s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + struct tc_bw_alloc *p; + u32 reg; + u8 i; + + reg = IXGBE_READ_REG(hw, IXGBE_PDPMCS); + /* Enable Data Plane Arbiter */ + reg &= ~IXGBE_PDPMCS_ARBDIS; + /* Enable DFP and Transmit Recycle Mode */ + reg |= (IXGBE_PDPMCS_TPPAC | IXGBE_PDPMCS_TRM); + + IXGBE_WRITE_REG(hw, IXGBE_PDPMCS, reg); + + /* Configure traffic class credits and priority */ + for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { + p = &dcb_config->tc_config[i].path[DCB_TX_CONFIG]; + reg = p->data_credits_refill; + reg |= (u32)(p->data_credits_max) << IXGBE_TDPT2TCCR_MCL_SHIFT; + reg |= (u32)(p->bwg_id) << IXGBE_TDPT2TCCR_BWG_SHIFT; + + if (p->prio_type == prio_group) + reg |= IXGBE_TDPT2TCCR_GSP; + + if (p->prio_type == prio_link) + reg |= IXGBE_TDPT2TCCR_LSP; + + IXGBE_WRITE_REG(hw, IXGBE_TDPT2TCCR(i), reg); + } + + /* Enable Tx packet buffer division */ + reg = IXGBE_READ_REG(hw, IXGBE_DTXCTL); + reg |= IXGBE_DTXCTL_ENDBUBD; + IXGBE_WRITE_REG(hw, IXGBE_DTXCTL, reg); + + return 0; +} + +/** + * ixgbe_dcb_config_pfc_82598 - Config priority flow control + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure Priority Flow Control for each traffic class. + */ +s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + u32 reg, rx_pba_size; + u8 i; + + /* Enable Transmit Priority Flow Control */ + reg = IXGBE_READ_REG(hw, IXGBE_RMCS); + reg &= ~IXGBE_RMCS_TFCE_802_3X; + /* correct the reporting of our flow control status */ + hw->fc.type = ixgbe_fc_none; + reg |= IXGBE_RMCS_TFCE_PRIORITY; + IXGBE_WRITE_REG(hw, IXGBE_RMCS, reg); + + /* Enable Receive Priority Flow Control */ + reg = IXGBE_READ_REG(hw, IXGBE_FCTRL); + reg &= ~IXGBE_FCTRL_RFCE; + reg |= IXGBE_FCTRL_RPFCE; + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg); + + /* + * Configure flow control thresholds and enable priority flow control + * for each traffic class. + */ + for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { + if (dcb_config->rx_pba_cfg == pba_equal) { + rx_pba_size = IXGBE_RXPBSIZE_64KB; + } else { + rx_pba_size = (i < 4) ? IXGBE_RXPBSIZE_80KB + : IXGBE_RXPBSIZE_48KB; + } + + reg = ((rx_pba_size >> 5) & 0xFFF0); + if (dcb_config->tc_config[i].dcb_pfc == pfc_enabled_tx || + dcb_config->tc_config[i].dcb_pfc == pfc_enabled_full) + reg |= IXGBE_FCRTL_XONE; + + IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), reg); + + reg = ((rx_pba_size >> 2) & 0xFFF0); + if (dcb_config->tc_config[i].dcb_pfc == pfc_enabled_tx || + dcb_config->tc_config[i].dcb_pfc == pfc_enabled_full) + reg |= IXGBE_FCRTH_FCEN; + + IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), reg); + } + + /* Configure pause time */ + for (i = 0; i < (MAX_TRAFFIC_CLASS >> 1); i++) + IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), 0x68006800); + + /* Configure flow control refresh threshold value */ + IXGBE_WRITE_REG(hw, IXGBE_FCRTV, 0x3400); + + return 0; +} + +/** + * ixgbe_dcb_config_tc_stats_82598 - Configure traffic class statistics + * @hw: pointer to hardware structure + * + * Configure queue statistics registers, all queues belonging to same traffic + * class uses a single set of queue statistics counters. + */ +s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *hw) +{ + u32 reg = 0; + u8 i = 0; + u8 j = 0; + + /* Receive Queues stats setting - 8 queues per statistics reg */ + for (i = 0, j = 0; i < 15 && j < 8; i = i + 2, j++) { + reg = IXGBE_READ_REG(hw, IXGBE_RQSMR(i)); + reg |= ((0x1010101) * j); + IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i), reg); + reg = IXGBE_READ_REG(hw, IXGBE_RQSMR(i + 1)); + reg |= ((0x1010101) * j); + IXGBE_WRITE_REG(hw, IXGBE_RQSMR(i + 1), reg); + } + /* Transmit Queues stats setting - 4 queues per statistics reg */ + for (i = 0; i < 8; i++) { + reg = IXGBE_READ_REG(hw, IXGBE_TQSMR(i)); + reg |= ((0x1010101) * i); + IXGBE_WRITE_REG(hw, IXGBE_TQSMR(i), reg); + } + + return 0; +} + +/** + * ixgbe_dcb_hw_config_82598 - Config and enable DCB + * @hw: pointer to hardware structure + * @dcb_config: pointer to ixgbe_dcb_config structure + * + * Configure dcb settings and enable dcb mode. + */ +s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *hw, + struct ixgbe_dcb_config *dcb_config) +{ + ixgbe_dcb_config_packet_buffers_82598(hw, dcb_config); + ixgbe_dcb_config_rx_arbiter_82598(hw, dcb_config); + ixgbe_dcb_config_tx_desc_arbiter_82598(hw, dcb_config); + ixgbe_dcb_config_tx_data_arbiter_82598(hw, dcb_config); + ixgbe_dcb_config_pfc_82598(hw, dcb_config); + ixgbe_dcb_config_tc_stats_82598(hw); + + return 0; +} diff --git a/drivers/net/ixgbe/ixgbe_dcb_82598.h b/drivers/net/ixgbe/ixgbe_dcb_82598.h new file mode 100644 index 000000000000..1e6a313719d7 --- /dev/null +++ b/drivers/net/ixgbe/ixgbe_dcb_82598.h @@ -0,0 +1,94 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2007 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + Linux NICS + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _DCB_82598_CONFIG_H_ +#define _DCB_82598_CONFIG_H_ + +/* DCB register definitions */ + +#define IXGBE_DPMCS_MTSOS_SHIFT 16 +#define IXGBE_DPMCS_TDPAC 0x00000001 /* 0 Round Robin, 1 DFP - Deficit Fixed Priority */ +#define IXGBE_DPMCS_TRM 0x00000010 /* Transmit Recycle Mode */ +#define IXGBE_DPMCS_ARBDIS 0x00000040 /* DCB arbiter disable */ +#define IXGBE_DPMCS_TSOEF 0x00080000 /* TSO Expand Factor: 0=x4, 1=x2 */ + +#define IXGBE_RUPPBMR_MQA 0x80000000 /* Enable UP to queue mapping */ + +#define IXGBE_RT2CR_MCL_SHIFT 12 /* Offset to Max Credit Limit setting */ +#define IXGBE_RT2CR_LSP 0x80000000 /* LSP enable bit */ + +#define IXGBE_RDRXCTL_MPBEN 0x00000010 /* DMA config for multiple packet buffers enable */ +#define IXGBE_RDRXCTL_MCEN 0x00000040 /* DMA config for multiple cores (RSS) enable */ + +#define IXGBE_TDTQ2TCCR_MCL_SHIFT 12 +#define IXGBE_TDTQ2TCCR_BWG_SHIFT 9 +#define IXGBE_TDTQ2TCCR_GSP 0x40000000 +#define IXGBE_TDTQ2TCCR_LSP 0x80000000 + +#define IXGBE_TDPT2TCCR_MCL_SHIFT 12 +#define IXGBE_TDPT2TCCR_BWG_SHIFT 9 +#define IXGBE_TDPT2TCCR_GSP 0x40000000 +#define IXGBE_TDPT2TCCR_LSP 0x80000000 + +#define IXGBE_PDPMCS_TPPAC 0x00000020 /* 0 Round Robin, 1 for DFP - Deficit Fixed Priority */ +#define IXGBE_PDPMCS_ARBDIS 0x00000040 /* Arbiter disable */ +#define IXGBE_PDPMCS_TRM 0x00000100 /* Transmit Recycle Mode enable */ + +#define IXGBE_DTXCTL_ENDBUBD 0x00000004 /* Enable DBU buffer division */ + +#define IXGBE_TXPBSIZE_40KB 0x0000A000 /* 40KB Packet Buffer */ +#define IXGBE_RXPBSIZE_48KB 0x0000C000 /* 48KB Packet Buffer */ +#define IXGBE_RXPBSIZE_64KB 0x00010000 /* 64KB Packet Buffer */ +#define IXGBE_RXPBSIZE_80KB 0x00014000 /* 80KB Packet Buffer */ + +#define IXGBE_RDRXCTL_RDMTS_1_2 0x00000000 + +/* DCB hardware-specific driver APIs */ + +/* DCB PFC functions */ +s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *, struct ixgbe_dcb_config *); +s32 ixgbe_dcb_get_pfc_stats_82598(struct ixgbe_hw *, struct ixgbe_hw_stats *, + u8); + +/* DCB traffic class stats */ +s32 ixgbe_dcb_config_tc_stats_82598(struct ixgbe_hw *); +s32 ixgbe_dcb_get_tc_stats_82598(struct ixgbe_hw *, struct ixgbe_hw_stats *, + u8); + +/* DCB config arbiters */ +s32 ixgbe_dcb_config_tx_desc_arbiter_82598(struct ixgbe_hw *, + struct ixgbe_dcb_config *); +s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *, + struct ixgbe_dcb_config *); +s32 ixgbe_dcb_config_rx_arbiter_82598(struct ixgbe_hw *, + struct ixgbe_dcb_config *); + +/* DCB hw initialization */ +s32 ixgbe_dcb_hw_config_82598(struct ixgbe_hw *, struct ixgbe_dcb_config *); + +#endif /* _DCB_82598_CONFIG_H */ diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c new file mode 100644 index 000000000000..50bff2af6b04 --- /dev/null +++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c @@ -0,0 +1,356 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2008 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + Linux NICS + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe.h" +#include + +/* Callbacks for DCB netlink in the kernel */ +#define BIT_DCB_MODE 0x01 +#define BIT_PFC 0x02 +#define BIT_PG_RX 0x04 +#define BIT_PG_TX 0x08 + +int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg, + struct ixgbe_dcb_config *dst_dcb_cfg, int tc_max) +{ + struct tc_configuration *src_tc_cfg = NULL; + struct tc_configuration *dst_tc_cfg = NULL; + int i; + + if (!src_dcb_cfg || !dst_dcb_cfg) + return -EINVAL; + + for (i = DCB_PG_ATTR_TC_0; i < tc_max + DCB_PG_ATTR_TC_0; i++) { + src_tc_cfg = &src_dcb_cfg->tc_config[i - DCB_PG_ATTR_TC_0]; + dst_tc_cfg = &dst_dcb_cfg->tc_config[i - DCB_PG_ATTR_TC_0]; + + dst_tc_cfg->path[DCB_TX_CONFIG].prio_type = + src_tc_cfg->path[DCB_TX_CONFIG].prio_type; + + dst_tc_cfg->path[DCB_TX_CONFIG].bwg_id = + src_tc_cfg->path[DCB_TX_CONFIG].bwg_id; + + dst_tc_cfg->path[DCB_TX_CONFIG].bwg_percent = + src_tc_cfg->path[DCB_TX_CONFIG].bwg_percent; + + dst_tc_cfg->path[DCB_TX_CONFIG].up_to_tc_bitmap = + src_tc_cfg->path[DCB_TX_CONFIG].up_to_tc_bitmap; + + dst_tc_cfg->path[DCB_RX_CONFIG].prio_type = + src_tc_cfg->path[DCB_RX_CONFIG].prio_type; + + dst_tc_cfg->path[DCB_RX_CONFIG].bwg_id = + src_tc_cfg->path[DCB_RX_CONFIG].bwg_id; + + dst_tc_cfg->path[DCB_RX_CONFIG].bwg_percent = + src_tc_cfg->path[DCB_RX_CONFIG].bwg_percent; + + dst_tc_cfg->path[DCB_RX_CONFIG].up_to_tc_bitmap = + src_tc_cfg->path[DCB_RX_CONFIG].up_to_tc_bitmap; + } + + for (i = DCB_PG_ATTR_BW_ID_0; i < DCB_PG_ATTR_BW_ID_MAX; i++) { + dst_dcb_cfg->bw_percentage[DCB_TX_CONFIG] + [i-DCB_PG_ATTR_BW_ID_0] = src_dcb_cfg->bw_percentage + [DCB_TX_CONFIG][i-DCB_PG_ATTR_BW_ID_0]; + dst_dcb_cfg->bw_percentage[DCB_RX_CONFIG] + [i-DCB_PG_ATTR_BW_ID_0] = src_dcb_cfg->bw_percentage + [DCB_RX_CONFIG][i-DCB_PG_ATTR_BW_ID_0]; + } + + for (i = DCB_PFC_UP_ATTR_0; i < DCB_PFC_UP_ATTR_MAX; i++) { + dst_dcb_cfg->tc_config[i - DCB_PFC_UP_ATTR_0].dcb_pfc = + src_dcb_cfg->tc_config[i - DCB_PFC_UP_ATTR_0].dcb_pfc; + } + + return 0; +} + +static u8 ixgbe_dcbnl_get_state(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + DPRINTK(DRV, INFO, "Get DCB Admin Mode.\n"); + + return !!(adapter->flags & IXGBE_FLAG_DCB_ENABLED); +} + +static u16 ixgbe_dcb_select_queue(struct net_device *dev, struct sk_buff *skb) +{ + /* All traffic should default to class 0 */ + return 0; +} + +static void ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + DPRINTK(DRV, INFO, "Set DCB Admin Mode.\n"); + + if (state > 0) { + /* Turn on DCB */ + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + return; + } else { + if (netif_running(netdev)) + netdev->stop(netdev); + ixgbe_reset_interrupt_capability(adapter); + ixgbe_napi_del_all(adapter); + kfree(adapter->tx_ring); + kfree(adapter->rx_ring); + adapter->tx_ring = NULL; + adapter->rx_ring = NULL; + netdev->select_queue = &ixgbe_dcb_select_queue; + + adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; + adapter->flags |= IXGBE_FLAG_DCB_ENABLED; + ixgbe_init_interrupt_scheme(adapter); + ixgbe_napi_add_all(adapter); + if (netif_running(netdev)) + netdev->open(netdev); + } + } else { + /* Turn off DCB */ + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + if (netif_running(netdev)) + netdev->stop(netdev); + ixgbe_reset_interrupt_capability(adapter); + ixgbe_napi_del_all(adapter); + kfree(adapter->tx_ring); + kfree(adapter->rx_ring); + adapter->tx_ring = NULL; + adapter->rx_ring = NULL; + netdev->select_queue = NULL; + + adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED; + adapter->flags |= IXGBE_FLAG_RSS_ENABLED; + ixgbe_init_interrupt_scheme(adapter); + ixgbe_napi_add_all(adapter); + if (netif_running(netdev)) + netdev->open(netdev); + } else { + return; + } + } +} + +static void ixgbe_dcbnl_get_perm_hw_addr(struct net_device *netdev, + u8 *perm_addr) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int i; + + for (i = 0; i < netdev->addr_len; i++) + perm_addr[i] = adapter->hw.mac.perm_addr[i]; +} + +static void ixgbe_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc, + u8 prio, u8 bwg_id, u8 bw_pct, + u8 up_map) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + if (prio != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[0].prio_type = prio; + if (bwg_id != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_id = bwg_id; + if (bw_pct != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_percent = + bw_pct; + if (up_map != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap = + up_map; + + if ((adapter->temp_dcb_cfg.tc_config[tc].path[0].prio_type != + adapter->dcb_cfg.tc_config[tc].path[0].prio_type) || + (adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_id != + adapter->dcb_cfg.tc_config[tc].path[0].bwg_id) || + (adapter->temp_dcb_cfg.tc_config[tc].path[0].bwg_percent != + adapter->dcb_cfg.tc_config[tc].path[0].bwg_percent) || + (adapter->temp_dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap != + adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap)) + adapter->dcb_set_bitmap |= BIT_PG_TX; +} + +static void ixgbe_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id, + u8 bw_pct) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] = bw_pct; + + if (adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] != + adapter->dcb_cfg.bw_percentage[0][bwg_id]) + adapter->dcb_set_bitmap |= BIT_PG_RX; +} + +static void ixgbe_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev, int tc, + u8 prio, u8 bwg_id, u8 bw_pct, + u8 up_map) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + if (prio != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[1].prio_type = prio; + if (bwg_id != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_id = bwg_id; + if (bw_pct != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_percent = + bw_pct; + if (up_map != DCB_ATTR_VALUE_UNDEFINED) + adapter->temp_dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap = + up_map; + + if ((adapter->temp_dcb_cfg.tc_config[tc].path[1].prio_type != + adapter->dcb_cfg.tc_config[tc].path[1].prio_type) || + (adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_id != + adapter->dcb_cfg.tc_config[tc].path[1].bwg_id) || + (adapter->temp_dcb_cfg.tc_config[tc].path[1].bwg_percent != + adapter->dcb_cfg.tc_config[tc].path[1].bwg_percent) || + (adapter->temp_dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap != + adapter->dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap)) + adapter->dcb_set_bitmap |= BIT_PG_RX; +} + +static void ixgbe_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id, + u8 bw_pct) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] = bw_pct; + + if (adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] != + adapter->dcb_cfg.bw_percentage[1][bwg_id]) + adapter->dcb_set_bitmap |= BIT_PG_RX; +} + +static void ixgbe_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int tc, + u8 *prio, u8 *bwg_id, u8 *bw_pct, + u8 *up_map) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + *prio = adapter->dcb_cfg.tc_config[tc].path[0].prio_type; + *bwg_id = adapter->dcb_cfg.tc_config[tc].path[0].bwg_id; + *bw_pct = adapter->dcb_cfg.tc_config[tc].path[0].bwg_percent; + *up_map = adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap; +} + +static void ixgbe_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id, + u8 *bw_pct) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + *bw_pct = adapter->dcb_cfg.bw_percentage[0][bwg_id]; +} + +static void ixgbe_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int tc, + u8 *prio, u8 *bwg_id, u8 *bw_pct, + u8 *up_map) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + *prio = adapter->dcb_cfg.tc_config[tc].path[1].prio_type; + *bwg_id = adapter->dcb_cfg.tc_config[tc].path[1].bwg_id; + *bw_pct = adapter->dcb_cfg.tc_config[tc].path[1].bwg_percent; + *up_map = adapter->dcb_cfg.tc_config[tc].path[1].up_to_tc_bitmap; +} + +static void ixgbe_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id, + u8 *bw_pct) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + *bw_pct = adapter->dcb_cfg.bw_percentage[1][bwg_id]; +} + +static void ixgbe_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority, + u8 setting) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc = setting; + if (adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc != + adapter->dcb_cfg.tc_config[priority].dcb_pfc) + adapter->dcb_set_bitmap |= BIT_PFC; +} + +static void ixgbe_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority, + u8 *setting) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + *setting = adapter->dcb_cfg.tc_config[priority].dcb_pfc; +} + +static u8 ixgbe_dcbnl_set_all(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int ret; + + if (!adapter->dcb_set_bitmap) + return 1; + + while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state)) + msleep(1); + + if (netif_running(netdev)) + ixgbe_down(adapter); + + ret = ixgbe_copy_dcb_cfg(&adapter->temp_dcb_cfg, &adapter->dcb_cfg, + adapter->ring_feature[RING_F_DCB].indices); + if (ret) { + clear_bit(__IXGBE_RESETTING, &adapter->state); + return ret; + } + + if (netif_running(netdev)) + ixgbe_up(adapter); + + adapter->dcb_set_bitmap = 0x00; + clear_bit(__IXGBE_RESETTING, &adapter->state); + return ret; +} + +struct dcbnl_rtnl_ops dcbnl_ops = { + .getstate = ixgbe_dcbnl_get_state, + .setstate = ixgbe_dcbnl_set_state, + .getpermhwaddr = ixgbe_dcbnl_get_perm_hw_addr, + .setpgtccfgtx = ixgbe_dcbnl_set_pg_tc_cfg_tx, + .setpgbwgcfgtx = ixgbe_dcbnl_set_pg_bwg_cfg_tx, + .setpgtccfgrx = ixgbe_dcbnl_set_pg_tc_cfg_rx, + .setpgbwgcfgrx = ixgbe_dcbnl_set_pg_bwg_cfg_rx, + .getpgtccfgtx = ixgbe_dcbnl_get_pg_tc_cfg_tx, + .getpgbwgcfgtx = ixgbe_dcbnl_get_pg_bwg_cfg_tx, + .getpgtccfgrx = ixgbe_dcbnl_get_pg_tc_cfg_rx, + .getpgbwgcfgrx = ixgbe_dcbnl_get_pg_bwg_cfg_rx, + .setpfccfg = ixgbe_dcbnl_set_pfc_cfg, + .getpfccfg = ixgbe_dcbnl_get_pfc_cfg, + .setall = ixgbe_dcbnl_set_all +}; + diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c index a610016a0172..aaa4404e7c5f 100644 --- a/drivers/net/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ixgbe/ixgbe_ethtool.c @@ -97,9 +97,18 @@ static struct ixgbe_stats ixgbe_gstrings_stats[] = { ((((struct ixgbe_adapter *)netdev_priv(netdev))->num_tx_queues + \ ((struct ixgbe_adapter *)netdev_priv(netdev))->num_rx_queues) * \ (sizeof(struct ixgbe_queue_stats) / sizeof(u64))) -#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + IXGBE_QUEUE_STATS_LEN) #define IXGBE_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_stats) -#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + IXGBE_QUEUE_STATS_LEN) +#define IXGBE_PB_STATS_LEN ( \ + (((struct ixgbe_adapter *)netdev->priv)->flags & \ + IXGBE_FLAG_DCB_ENABLED) ? \ + (sizeof(((struct ixgbe_adapter *)0)->stats.pxonrxc) + \ + sizeof(((struct ixgbe_adapter *)0)->stats.pxontxc) + \ + sizeof(((struct ixgbe_adapter *)0)->stats.pxoffrxc) + \ + sizeof(((struct ixgbe_adapter *)0)->stats.pxofftxc)) \ + / sizeof(u64) : 0) +#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + \ + IXGBE_PB_STATS_LEN + \ + IXGBE_QUEUE_STATS_LEN) static int ixgbe_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) @@ -831,6 +840,16 @@ static void ixgbe_get_ethtool_stats(struct net_device *netdev, data[i + k] = queue_stat[k]; i += k; } + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + for (j = 0; j < MAX_TX_PACKET_BUFFERS; j++) { + data[i++] = adapter->stats.pxontxc[j]; + data[i++] = adapter->stats.pxofftxc[j]; + } + for (j = 0; j < MAX_RX_PACKET_BUFFERS; j++) { + data[i++] = adapter->stats.pxonrxc[j]; + data[i++] = adapter->stats.pxoffrxc[j]; + } + } } static void ixgbe_get_strings(struct net_device *netdev, u32 stringset, @@ -859,6 +878,13 @@ static void ixgbe_get_strings(struct net_device *netdev, u32 stringset, sprintf(p, "rx_queue_%u_bytes", i); p += ETH_GSTRING_LEN; } + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) { + sprintf(p, "tx_pb_%u_pxon", i); + } + for (i = 0; i < MAX_RX_PACKET_BUFFERS; i++) { + } + } /* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */ break; } diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 40108523377f..91dde9cdab66 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -404,7 +404,7 @@ static void ixgbe_receive_skb(struct ixgbe_adapter *adapter, if (adapter->netdev->features & NETIF_F_LRO && skb->ip_summed == CHECKSUM_UNNECESSARY) { - if (adapter->vlgrp && is_vlan) + if (adapter->vlgrp && is_vlan && (tag != 0)) lro_vlan_hwaccel_receive_skb(&ring->lro_mgr, skb, adapter->vlgrp, tag, rx_desc); @@ -413,12 +413,12 @@ static void ixgbe_receive_skb(struct ixgbe_adapter *adapter, ring->lro_used = true; } else { if (!(adapter->flags & IXGBE_FLAG_IN_NETPOLL)) { - if (adapter->vlgrp && is_vlan) + if (adapter->vlgrp && is_vlan && (tag != 0)) vlan_hwaccel_receive_skb(skb, adapter->vlgrp, tag); else netif_receive_skb(skb); } else { - if (adapter->vlgrp && is_vlan) + if (adapter->vlgrp && is_vlan && (tag != 0)) vlan_hwaccel_rx(skb, adapter->vlgrp, tag); else netif_rx(skb); @@ -1670,10 +1670,12 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) * effects of setting this bit are only that SRRCTL must be * fully programmed [0..15] */ - rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); - rdrxctl |= IXGBE_RDRXCTL_MVMEN; - IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); - + if (adapter->flags & + (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_VMDQ_ENABLED)) { + rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); + rdrxctl |= IXGBE_RDRXCTL_MVMEN; + IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl); + } if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) { /* Fill out redirection table */ @@ -1732,6 +1734,16 @@ static void ixgbe_vlan_rx_register(struct net_device *netdev, ixgbe_irq_disable(adapter); adapter->vlgrp = grp; + /* + * For a DCB driver, always enable VLAN tag stripping so we can + * still receive traffic from a DCB-enabled host even if we're + * not in DCB mode. + */ + ctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_VLNCTRL); + ctrl |= IXGBE_VLNCTRL_VME; + ctrl &= ~IXGBE_VLNCTRL_CFIEN; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VLNCTRL, ctrl); + if (grp) { /* enable VLAN tag insert/strip */ ctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_VLNCTRL); @@ -1896,6 +1908,44 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter) } } +#ifdef CONFIG_IXGBE_DCBNL +/* + * ixgbe_configure_dcb - Configure DCB hardware + * @adapter: ixgbe adapter struct + * + * This is called by the driver on open to configure the DCB hardware. + * This is also called by the gennetlink interface when reconfiguring + * the DCB state. + */ +static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 txdctl, vlnctrl; + int i, j; + + ixgbe_dcb_check_config(&adapter->dcb_cfg); + ixgbe_dcb_calculate_tc_credits(&adapter->dcb_cfg, DCB_TX_CONFIG); + ixgbe_dcb_calculate_tc_credits(&adapter->dcb_cfg, DCB_RX_CONFIG); + + /* reconfigure the hardware */ + ixgbe_dcb_hw_config(&adapter->hw, &adapter->dcb_cfg); + + for (i = 0; i < adapter->num_tx_queues; i++) { + j = adapter->tx_ring[i].reg_idx; + txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(j)); + /* PThresh workaround for Tx hang with DFP enabled. */ + txdctl |= 32; + IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(j), txdctl); + } + /* Enable VLAN tag insert/strip */ + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctrl |= IXGBE_VLNCTRL_VME | IXGBE_VLNCTRL_VFE; + vlnctrl &= ~IXGBE_VLNCTRL_CFIEN; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + hw->mac.ops.set_vfta(&adapter->hw, 0, 0, true); +} + +#endif static void ixgbe_configure(struct ixgbe_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -1904,6 +1954,16 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter) ixgbe_set_rx_mode(netdev); ixgbe_restore_vlan(adapter); +#ifdef CONFIG_IXGBE_DCBNL + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + netif_set_gso_max_size(netdev, 32768); + ixgbe_configure_dcb(adapter); + } else { + netif_set_gso_max_size(netdev, 65536); + } +#else + netif_set_gso_max_size(netdev, 65536); +#endif ixgbe_configure_tx(adapter); ixgbe_configure_rx(adapter); @@ -1995,9 +2055,6 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter) ixgbe_irq_enable(adapter); - /* enable transmits */ - netif_tx_start_all_queues(netdev); - /* bring the link up in the watchdog, this could race with our first * link up interrupt but shouldn't be a problem */ adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; @@ -2260,6 +2317,11 @@ static void ixgbe_reset_task(struct work_struct *work) struct ixgbe_adapter *adapter; adapter = container_of(work, struct ixgbe_adapter, reset_task); + /* If we're already down or resetting, just bail */ + if (test_bit(__IXGBE_DOWN, &adapter->state) || + test_bit(__IXGBE_RESETTING, &adapter->state)) + return; + adapter->tx_timeout_count++; ixgbe_reinit_locked(adapter); @@ -2269,15 +2331,31 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter) { int nrq = 1, ntq = 1; int feature_mask = 0, rss_i, rss_m; + int dcb_i, dcb_m; /* Number of supported queues */ switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: + dcb_i = adapter->ring_feature[RING_F_DCB].indices; + dcb_m = 0; rss_i = adapter->ring_feature[RING_F_RSS].indices; rss_m = 0; feature_mask |= IXGBE_FLAG_RSS_ENABLED; + feature_mask |= IXGBE_FLAG_DCB_ENABLED; switch (adapter->flags & feature_mask) { + case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_DCB_ENABLED): + dcb_m = 0x7 << 3; + rss_i = min(8, rss_i); + rss_m = 0x7; + nrq = dcb_i * rss_i; + ntq = min(MAX_TX_QUEUES, dcb_i * rss_i); + break; + case (IXGBE_FLAG_DCB_ENABLED): + dcb_m = 0x7 << 3; + nrq = dcb_i; + ntq = dcb_i; + break; case (IXGBE_FLAG_RSS_ENABLED): rss_m = 0xF; nrq = rss_i; @@ -2285,6 +2363,8 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter) break; case 0: default: + dcb_i = 0; + dcb_m = 0; rss_i = 0; rss_m = 0; nrq = 1; @@ -2292,6 +2372,12 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter) break; } + /* Sanity check, we should never have zero queues */ + nrq = (nrq ?:1); + ntq = (ntq ?:1); + + adapter->ring_feature[RING_F_DCB].indices = dcb_i; + adapter->ring_feature[RING_F_DCB].mask = dcb_m; adapter->ring_feature[RING_F_RSS].indices = rss_i; adapter->ring_feature[RING_F_RSS].mask = rss_m; break; @@ -2343,6 +2429,7 @@ static void ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter, adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; kfree(adapter->msix_entries); adapter->msix_entries = NULL; + adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED; adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; ixgbe_set_num_queues(adapter); } else { @@ -2362,15 +2449,42 @@ static void __devinit ixgbe_cache_ring_register(struct ixgbe_adapter *adapter) { int feature_mask = 0, rss_i; int i, txr_idx, rxr_idx; + int dcb_i; /* Number of supported queues */ switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: + dcb_i = adapter->ring_feature[RING_F_DCB].indices; rss_i = adapter->ring_feature[RING_F_RSS].indices; txr_idx = 0; rxr_idx = 0; + feature_mask |= IXGBE_FLAG_DCB_ENABLED; feature_mask |= IXGBE_FLAG_RSS_ENABLED; switch (adapter->flags & feature_mask) { + case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_DCB_ENABLED): + for (i = 0; i < dcb_i; i++) { + int j; + /* Rx first */ + for (j = 0; j < adapter->num_rx_queues; j++) { + adapter->rx_ring[rxr_idx].reg_idx = + i << 3 | j; + rxr_idx++; + } + /* Tx now */ + for (j = 0; j < adapter->num_tx_queues; j++) { + adapter->tx_ring[txr_idx].reg_idx = + i << 2 | (j >> 1); + if (j & 1) + txr_idx++; + } + } + case (IXGBE_FLAG_DCB_ENABLED): + /* the number of queues is assumed to be symmetric */ + for (i = 0; i < dcb_i; i++) { + adapter->rx_ring[i].reg_idx = i << 3; + adapter->tx_ring[i].reg_idx = i << 2; + } + break; case (IXGBE_FLAG_RSS_ENABLED): for (i = 0; i < adapter->num_rx_queues; i++) adapter->rx_ring[i].reg_idx = i; @@ -2395,7 +2509,7 @@ static void __devinit ixgbe_cache_ring_register(struct ixgbe_adapter *adapter) * number of queues at compile-time. The polling_netdev array is * intended for Multiqueue, but should work fine with a single queue. **/ -static int __devinit ixgbe_alloc_queues(struct ixgbe_adapter *adapter) +static int ixgbe_alloc_queues(struct ixgbe_adapter *adapter) { int i; @@ -2465,6 +2579,7 @@ static int __devinit ixgbe_set_interrupt_capability(struct ixgbe_adapter adapter->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry), GFP_KERNEL); if (!adapter->msix_entries) { + adapter->flags &= ~IXGBE_FLAG_DCB_ENABLED; adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; ixgbe_set_num_queues(adapter); kfree(adapter->tx_ring); @@ -2505,7 +2620,7 @@ out: return err; } -static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter) +void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter) { if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { adapter->flags &= ~IXGBE_FLAG_MSIX_ENABLED; @@ -2529,7 +2644,7 @@ static void ixgbe_reset_interrupt_capability(struct ixgbe_adapter *adapter) * - Hardware queue count (num_*_queues) * - defined by miscellaneous hardware support/features (RSS, etc.) **/ -static int __devinit ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter) +int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter) { int err; @@ -2577,6 +2692,10 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; unsigned int rss; +#ifdef CONFIG_IXGBE_DCBNL + int j; + struct tc_configuration *tc; +#endif /* PCI config space info */ @@ -2590,6 +2709,27 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) rss = min(IXGBE_MAX_RSS_INDICES, (int)num_online_cpus()); adapter->ring_feature[RING_F_RSS].indices = rss; adapter->flags |= IXGBE_FLAG_RSS_ENABLED; + adapter->ring_feature[RING_F_DCB].indices = IXGBE_MAX_DCB_INDICES; + +#ifdef CONFIG_IXGBE_DCBNL + /* Configure DCB traffic classes */ + for (j = 0; j < MAX_TRAFFIC_CLASS; j++) { + tc = &adapter->dcb_cfg.tc_config[j]; + tc->path[DCB_TX_CONFIG].bwg_id = 0; + tc->path[DCB_TX_CONFIG].bwg_percent = 12 + (j & 1); + tc->path[DCB_RX_CONFIG].bwg_id = 0; + tc->path[DCB_RX_CONFIG].bwg_percent = 12 + (j & 1); + tc->dcb_pfc = pfc_disabled; + } + adapter->dcb_cfg.bw_percentage[DCB_TX_CONFIG][0] = 100; + adapter->dcb_cfg.bw_percentage[DCB_RX_CONFIG][0] = 100; + adapter->dcb_cfg.rx_pba_cfg = pba_equal; + adapter->dcb_cfg.round_robin_enable = false; + adapter->dcb_set_bitmap = 0x00; + ixgbe_copy_dcb_cfg(&adapter->dcb_cfg, &adapter->temp_dcb_cfg, + adapter->ring_feature[RING_F_DCB].indices); + +#endif if (hw->mac.ops.get_media_type && (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_copper)) adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE; @@ -2967,7 +3107,7 @@ static int ixgbe_close(struct net_device *netdev) * @adapter: private struct * helper function to napi_add each possible q_vector->napi */ -static void ixgbe_napi_add_all(struct ixgbe_adapter *adapter) +void ixgbe_napi_add_all(struct ixgbe_adapter *adapter) { int q_idx, q_vectors; int (*poll)(struct napi_struct *, int); @@ -2988,7 +3128,7 @@ static void ixgbe_napi_add_all(struct ixgbe_adapter *adapter) } } -static void ixgbe_napi_del_all(struct ixgbe_adapter *adapter) +void ixgbe_napi_del_all(struct ixgbe_adapter *adapter) { int q_idx; int q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; @@ -3109,6 +3249,18 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) adapter->stats.mpc[i] += mpc; total_mpc += adapter->stats.mpc[i]; adapter->stats.rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i)); + adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); + adapter->stats.qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i)); + adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); + adapter->stats.qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i)); + adapter->stats.pxonrxc[i] += IXGBE_READ_REG(hw, + IXGBE_PXONRXC(i)); + adapter->stats.pxontxc[i] += IXGBE_READ_REG(hw, + IXGBE_PXONTXC(i)); + adapter->stats.pxoffrxc[i] += IXGBE_READ_REG(hw, + IXGBE_PXOFFRXC(i)); + adapter->stats.pxofftxc[i] += IXGBE_READ_REG(hw, + IXGBE_PXOFFTXC(i)); } adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); /* work around hardware counting issue */ @@ -3248,6 +3400,7 @@ static void ixgbe_watchdog_task(struct work_struct *work) (FLOW_TX ? "TX" : "None")))); netif_carrier_on(netdev); + netif_tx_wake_all_queues(netdev); } else { /* Force detection of hung controller */ adapter->detect_tx_hung = true; @@ -3258,6 +3411,7 @@ static void ixgbe_watchdog_task(struct work_struct *work) if (netif_carrier_ok(netdev)) { DPRINTK(LINK, INFO, "NIC Link is Down\n"); netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); } } @@ -3604,6 +3758,14 @@ static int ixgbe_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (adapter->vlgrp && vlan_tx_tag_present(skb)) { tx_flags |= vlan_tx_tag_get(skb); + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + tx_flags &= ~IXGBE_TX_FLAGS_VLAN_PRIO_MASK; + tx_flags |= (skb->queue_mapping << 13); + } + tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT; + tx_flags |= IXGBE_TX_FLAGS_VLAN; + } else if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + tx_flags |= (skb->queue_mapping << 13); tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT; tx_flags |= IXGBE_TX_FLAGS_VLAN; } @@ -3878,6 +4040,13 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, netdev->vlan_features |= NETIF_F_IP_CSUM; netdev->vlan_features |= NETIF_F_SG; + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) + adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; + +#ifdef CONFIG_IXGBE_DCBNL + netdev->dcbnl_ops = &dcbnl_ops; +#endif + if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; @@ -3946,6 +4115,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, } netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); ixgbe_napi_add_all(adapter); diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h new file mode 100644 index 000000000000..32d32c1ee410 --- /dev/null +++ b/include/linux/dcbnl.h @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Lucy Liu + */ + +#ifndef __LINUX_DCBNL_H__ +#define __LINUX_DCBNL_H__ + +#define DCB_PROTO_VERSION 1 + +struct dcbmsg { + unsigned char dcb_family; + __u8 cmd; + __u16 dcb_pad; +}; + +/** + * enum dcbnl_commands - supported DCB commands + * + * @DCB_CMD_UNDEFINED: unspecified command to catch errors + * @DCB_CMD_GSTATE: request the state of DCB in the device + * @DCB_CMD_SSTATE: set the state of DCB in the device + * @DCB_CMD_PGTX_GCFG: request the priority group configuration for Tx + * @DCB_CMD_PGTX_SCFG: set the priority group configuration for Tx + * @DCB_CMD_PGRX_GCFG: request the priority group configuration for Rx + * @DCB_CMD_PGRX_SCFG: set the priority group configuration for Rx + * @DCB_CMD_PFC_GCFG: request the priority flow control configuration + * @DCB_CMD_PFC_SCFG: set the priority flow control configuration + * @DCB_CMD_SET_ALL: apply all changes to the underlying device + * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying + * device. Only useful when using bonding. + */ +enum dcbnl_commands { + DCB_CMD_UNDEFINED, + + DCB_CMD_GSTATE, + DCB_CMD_SSTATE, + + DCB_CMD_PGTX_GCFG, + DCB_CMD_PGTX_SCFG, + DCB_CMD_PGRX_GCFG, + DCB_CMD_PGRX_SCFG, + + DCB_CMD_PFC_GCFG, + DCB_CMD_PFC_SCFG, + + DCB_CMD_SET_ALL, + DCB_CMD_GPERM_HWADDR, + + __DCB_CMD_ENUM_MAX, + DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, +}; + + +/** + * enum dcbnl_attrs - DCB top-level netlink attributes + * + * @DCB_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_ATTR_IFNAME: interface name of the underlying device (NLA_STRING) + * @DCB_ATTR_STATE: enable state of DCB in the device (NLA_U8) + * @DCB_ATTR_PFC_STATE: enable state of PFC in the device (NLA_U8) + * @DCB_ATTR_PFC_CFG: priority flow control configuration (NLA_NESTED) + * @DCB_ATTR_NUM_TC: number of traffic classes supported in the device (NLA_U8) + * @DCB_ATTR_PG_CFG: priority group configuration (NLA_NESTED) + * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8) + * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED) + */ +enum dcbnl_attrs { + DCB_ATTR_UNDEFINED, + + DCB_ATTR_IFNAME, + DCB_ATTR_STATE, + DCB_ATTR_PFC_STATE, + DCB_ATTR_PFC_CFG, + DCB_ATTR_NUM_TC, + DCB_ATTR_PG_CFG, + DCB_ATTR_SET_ALL, + DCB_ATTR_PERM_HWADDR, + + __DCB_ATTR_ENUM_MAX, + DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_pfc_attrs - DCB Priority Flow Control user priority nested attrs + * + * @DCB_PFC_UP_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_PFC_UP_ATTR_0: Priority Flow Control value for User Priority 0 (NLA_U8) + * @DCB_PFC_UP_ATTR_1: Priority Flow Control value for User Priority 1 (NLA_U8) + * @DCB_PFC_UP_ATTR_2: Priority Flow Control value for User Priority 2 (NLA_U8) + * @DCB_PFC_UP_ATTR_3: Priority Flow Control value for User Priority 3 (NLA_U8) + * @DCB_PFC_UP_ATTR_4: Priority Flow Control value for User Priority 4 (NLA_U8) + * @DCB_PFC_UP_ATTR_5: Priority Flow Control value for User Priority 5 (NLA_U8) + * @DCB_PFC_UP_ATTR_6: Priority Flow Control value for User Priority 6 (NLA_U8) + * @DCB_PFC_UP_ATTR_7: Priority Flow Control value for User Priority 7 (NLA_U8) + * @DCB_PFC_UP_ATTR_MAX: highest attribute number currently defined + * @DCB_PFC_UP_ATTR_ALL: apply to all priority flow control attrs (NLA_FLAG) + * + */ +enum dcbnl_pfc_up_attrs { + DCB_PFC_UP_ATTR_UNDEFINED, + + DCB_PFC_UP_ATTR_0, + DCB_PFC_UP_ATTR_1, + DCB_PFC_UP_ATTR_2, + DCB_PFC_UP_ATTR_3, + DCB_PFC_UP_ATTR_4, + DCB_PFC_UP_ATTR_5, + DCB_PFC_UP_ATTR_6, + DCB_PFC_UP_ATTR_7, + DCB_PFC_UP_ATTR_ALL, + + __DCB_PFC_UP_ATTR_ENUM_MAX, + DCB_PFC_UP_ATTR_MAX = __DCB_PFC_UP_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_pg_attrs - DCB Priority Group attributes + * + * @DCB_PG_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_PG_ATTR_TC_0: Priority Group Traffic Class 0 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_1: Priority Group Traffic Class 1 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_2: Priority Group Traffic Class 2 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_3: Priority Group Traffic Class 3 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_4: Priority Group Traffic Class 4 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_5: Priority Group Traffic Class 5 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_6: Priority Group Traffic Class 6 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_7: Priority Group Traffic Class 7 configuration (NLA_NESTED) + * @DCB_PG_ATTR_TC_MAX: highest attribute number currently defined + * @DCB_PG_ATTR_TC_ALL: apply to all traffic classes (NLA_NESTED) + * @DCB_PG_ATTR_BW_ID_0: Percent of link bandwidth for Priority Group 0 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_1: Percent of link bandwidth for Priority Group 1 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_2: Percent of link bandwidth for Priority Group 2 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_3: Percent of link bandwidth for Priority Group 3 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_4: Percent of link bandwidth for Priority Group 4 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_5: Percent of link bandwidth for Priority Group 5 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_6: Percent of link bandwidth for Priority Group 6 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_7: Percent of link bandwidth for Priority Group 7 (NLA_U8) + * @DCB_PG_ATTR_BW_ID_MAX: highest attribute number currently defined + * @DCB_PG_ATTR_BW_ID_ALL: apply to all priority groups (NLA_FLAG) + * + */ +enum dcbnl_pg_attrs { + DCB_PG_ATTR_UNDEFINED, + + DCB_PG_ATTR_TC_0, + DCB_PG_ATTR_TC_1, + DCB_PG_ATTR_TC_2, + DCB_PG_ATTR_TC_3, + DCB_PG_ATTR_TC_4, + DCB_PG_ATTR_TC_5, + DCB_PG_ATTR_TC_6, + DCB_PG_ATTR_TC_7, + DCB_PG_ATTR_TC_MAX, + DCB_PG_ATTR_TC_ALL, + + DCB_PG_ATTR_BW_ID_0, + DCB_PG_ATTR_BW_ID_1, + DCB_PG_ATTR_BW_ID_2, + DCB_PG_ATTR_BW_ID_3, + DCB_PG_ATTR_BW_ID_4, + DCB_PG_ATTR_BW_ID_5, + DCB_PG_ATTR_BW_ID_6, + DCB_PG_ATTR_BW_ID_7, + DCB_PG_ATTR_BW_ID_MAX, + DCB_PG_ATTR_BW_ID_ALL, + + __DCB_PG_ATTR_ENUM_MAX, + DCB_PG_ATTR_MAX = __DCB_PG_ATTR_ENUM_MAX - 1, +}; + +/** + * enum dcbnl_tc_attrs - DCB Traffic Class attributes + * + * @DCB_TC_ATTR_PARAM_UNDEFINED: unspecified attribute to catch errors + * @DCB_TC_ATTR_PARAM_PGID: (NLA_U8) Priority group the traffic class belongs to + * Valid values are: 0-7 + * @DCB_TC_ATTR_PARAM_UP_MAPPING: (NLA_U8) Traffic class to user priority map + * Some devices may not support changing the + * user priority map of a TC. + * @DCB_TC_ATTR_PARAM_STRICT_PRIO: (NLA_U8) Strict priority setting + * 0 - none + * 1 - group strict + * 2 - link strict + * @DCB_TC_ATTR_PARAM_BW_PCT: optional - (NLA_U8) If supported by the device and + * not configured to use link strict priority, + * this is the percentage of bandwidth of the + * priority group this traffic class belongs to + * @DCB_TC_ATTR_PARAM_ALL: (NLA_FLAG) all traffic class parameters + * + */ +enum dcbnl_tc_attrs { + DCB_TC_ATTR_PARAM_UNDEFINED, + + DCB_TC_ATTR_PARAM_PGID, + DCB_TC_ATTR_PARAM_UP_MAPPING, + DCB_TC_ATTR_PARAM_STRICT_PRIO, + DCB_TC_ATTR_PARAM_BW_PCT, + DCB_TC_ATTR_PARAM_ALL, + + __DCB_TC_ATTR_PARAM_ENUM_MAX, + DCB_TC_ATTR_PARAM_MAX = __DCB_TC_ATTR_PARAM_ENUM_MAX - 1, +}; + +/** + * enum dcb_general_attr_values - general DCB attribute values + * + * @DCB_ATTR_UNDEFINED: value used to indicate an attribute is not supported + * + */ +enum dcb_general_attr_values { + DCB_ATTR_VALUE_UNDEFINED = 0xff +}; + + +#endif /* __LINUX_DCBNL_H__ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d8fb23679ee3..6095af572dfd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -43,6 +43,9 @@ #include #include +#ifdef CONFIG_DCBNL +#include +#endif struct vlan_group; struct ethtool_ops; @@ -843,6 +846,11 @@ struct net_device #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; +#ifdef CONFIG_DCBNL + /* Data Center Bridging netlink ops */ + struct dcbnl_rtnl_ops *dcbnl_ops; +#endif + #ifdef CONFIG_COMPAT_NET_DEV_OPS struct { int (*init)(struct net_device *dev); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2b3d51c6ec9c..e88f7058b3a1 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -107,6 +107,11 @@ enum { RTM_GETADDRLABEL, #define RTM_GETADDRLABEL RTM_GETADDRLABEL + RTM_GETDCB = 78, +#define RTM_GETDCB RTM_GETDCB + RTM_SETDCB, +#define RTM_SETDCB RTM_SETDCB + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h new file mode 100644 index 000000000000..0ef0c5a46d8b --- /dev/null +++ b/include/net/dcbnl.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Lucy Liu + */ + +#ifndef __NET_DCBNL_H__ +#define __NET_DCBNL_H__ + +/* + * Ops struct for the netlink callbacks. Used by DCB-enabled drivers through + * the netdevice struct. + */ +struct dcbnl_rtnl_ops { + u8 (*getstate)(struct net_device *); + void (*setstate)(struct net_device *, u8); + void (*getpermhwaddr)(struct net_device *, u8 *); + void (*setpgtccfgtx)(struct net_device *, int, u8, u8, u8, u8); + void (*setpgbwgcfgtx)(struct net_device *, int, u8); + void (*setpgtccfgrx)(struct net_device *, int, u8, u8, u8, u8); + void (*setpgbwgcfgrx)(struct net_device *, int, u8); + void (*getpgtccfgtx)(struct net_device *, int, u8 *, u8 *, u8 *, u8 *); + void (*getpgbwgcfgtx)(struct net_device *, int, u8 *); + void (*getpgtccfgrx)(struct net_device *, int, u8 *, u8 *, u8 *, u8 *); + void (*getpgbwgcfgrx)(struct net_device *, int, u8 *); + void (*setpfccfg)(struct net_device *, int, u8); + void (*getpfccfg)(struct net_device *, int, u8 *); + u8 (*setall)(struct net_device *); +}; + +#endif /* __NET_DCBNL_H__ */ diff --git a/net/Kconfig b/net/Kconfig index 4e2e40ba8ba6..c7d01c3a23c5 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -194,6 +194,7 @@ source "net/lapb/Kconfig" source "net/econet/Kconfig" source "net/wanrouter/Kconfig" source "net/sched/Kconfig" +source "net/dcb/Kconfig" menu "Network testing" diff --git a/net/Makefile b/net/Makefile index 27d1f10dc0e0..83b064651f1d 100644 --- a/net/Makefile +++ b/net/Makefile @@ -57,6 +57,9 @@ obj-$(CONFIG_NETLABEL) += netlabel/ obj-$(CONFIG_IUCV) += iucv/ obj-$(CONFIG_RFKILL) += rfkill/ obj-$(CONFIG_NET_9P) += 9p/ +ifeq ($(CONFIG_DCBNL),y) +obj-$(CONFIG_DCB) += dcb/ +endif ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff --git a/net/dcb/Kconfig b/net/dcb/Kconfig new file mode 100644 index 000000000000..bdf38802d339 --- /dev/null +++ b/net/dcb/Kconfig @@ -0,0 +1,12 @@ +config DCB + tristate "Data Center Bridging support" + +config DCBNL + bool "Data Center Bridging netlink interface support" + depends on DCB + default n + ---help--- + This option turns on the netlink interface + (dcbnl) for Data Center Bridging capable devices. + + If unsure, say N. diff --git a/net/dcb/Makefile b/net/dcb/Makefile new file mode 100644 index 000000000000..9930f4cde818 --- /dev/null +++ b/net/dcb/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_DCB) += dcbnl.o diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c new file mode 100644 index 000000000000..516e8be83d72 --- /dev/null +++ b/net/dcb/dcbnl.c @@ -0,0 +1,704 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Lucy Liu + */ + +#include +#include +#include +#include +#include +#include +#include + +/** + * Data Center Bridging (DCB) is a collection of Ethernet enhancements + * intended to allow network traffic with differing requirements + * (highly reliable, no drops vs. best effort vs. low latency) to operate + * and co-exist on Ethernet. Current DCB features are: + * + * Enhanced Transmission Selection (aka Priority Grouping [PG]) - provides a + * framework for assigning bandwidth guarantees to traffic classes. + * + * Priority-based Flow Control (PFC) - provides a flow control mechanism which + * can work independently for each 802.1p priority. + * + * Congestion Notification - provides a mechanism for end-to-end congestion + * control for protocols which do not have built-in congestion management. + * + * More information about the emerging standards for these Ethernet features + * can be found at: http://www.ieee802.org/1/pages/dcbridges.html + * + * This file implements an rtnetlink interface to allow configuration of DCB + * features for capable devices. + */ + +MODULE_AUTHOR("Lucy Liu, "); +MODULE_DESCRIPTION("Data Center Bridging generic netlink interface"); +MODULE_LICENSE("GPL"); + +/**************** DCB attribute policies *************************************/ + +/* DCB netlink attributes policy */ +static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { + [DCB_ATTR_IFNAME] = {.type = NLA_STRING, .len = IFNAMSIZ - 1}, + [DCB_ATTR_STATE] = {.type = NLA_U8}, + [DCB_ATTR_PFC_CFG] = {.type = NLA_NESTED}, + [DCB_ATTR_PG_CFG] = {.type = NLA_NESTED}, + [DCB_ATTR_SET_ALL] = {.type = NLA_U8}, + [DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG}, +}; + +/* DCB priority flow control to User Priority nested attributes */ +static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = { + [DCB_PFC_UP_ATTR_0] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_1] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_2] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_3] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_4] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_5] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_6] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_7] = {.type = NLA_U8}, + [DCB_PFC_UP_ATTR_ALL] = {.type = NLA_FLAG}, +}; + +/* DCB priority grouping nested attributes */ +static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = { + [DCB_PG_ATTR_TC_0] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_1] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_2] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_3] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_4] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_5] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_6] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_7] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_TC_ALL] = {.type = NLA_NESTED}, + [DCB_PG_ATTR_BW_ID_0] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_1] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_2] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_3] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_4] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_5] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_6] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_7] = {.type = NLA_U8}, + [DCB_PG_ATTR_BW_ID_ALL] = {.type = NLA_FLAG}, +}; + +/* DCB traffic class nested attributes. */ +static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = { + [DCB_TC_ATTR_PARAM_PGID] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_UP_MAPPING] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_STRICT_PRIO] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_BW_PCT] = {.type = NLA_U8}, + [DCB_TC_ATTR_PARAM_ALL] = {.type = NLA_FLAG}, +}; + + +/* standard netlink reply call */ +static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid, + u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct dcbmsg *dcb; + struct nlmsghdr *nlh; + int ret = -EINVAL; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + return ret; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, event, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = cmd; + dcb->dcb_pad = 0; + + ret = nla_put_u8(dcbnl_skb, attr, value); + if (ret) + goto err; + + /* end the message, assign the nlmsg_len. */ + nlmsg_end(dcbnl_skb, nlh); + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; +nlmsg_failure: +err: + kfree(dcbnl_skb); + return ret; +} + +static int dcbnl_getstate(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + + /* if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->getstate) */ + if (!netdev->dcbnl_ops->getstate) + return ret; + + ret = dcbnl_reply(netdev->dcbnl_ops->getstate(netdev), RTM_GETDCB, + DCB_CMD_GSTATE, DCB_ATTR_STATE, pid, seq, flags); + + return ret; +} + +static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1], *nest; + u8 value; + int ret = -EINVAL; + int i; + int getall = 0; + + if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->getpfccfg) + return ret; + + ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, + tb[DCB_ATTR_PFC_CFG], + dcbnl_pfc_up_nest); + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_PFC_GCFG; + + nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PFC_CFG); + if (!nest) + goto err; + + if (data[DCB_PFC_UP_ATTR_ALL]) + getall = 1; + + for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) { + if (!getall && !data[i]) + continue; + + netdev->dcbnl_ops->getpfccfg(netdev, i - DCB_PFC_UP_ATTR_0, + &value); + ret = nla_put_u8(dcbnl_skb, i, value); + + if (ret) { + nla_nest_cancel(dcbnl_skb, nest); + goto err; + } + } + nla_nest_end(dcbnl_skb, nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + return -EINVAL; +} + +static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + u8 perm_addr[MAX_ADDR_LEN]; + int ret = -EINVAL; + + if (!netdev->dcbnl_ops->getpermhwaddr) + return ret; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_GPERM_HWADDR; + + netdev->dcbnl_ops->getpermhwaddr(netdev, perm_addr); + + ret = nla_put(dcbnl_skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr), + perm_addr); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; + +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + return -EINVAL; +} + +static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags, int dir) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *pg_nest, *param_nest, *data; + struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1]; + struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1]; + u8 prio, pgid, tc_pct, up_map; + int ret = -EINVAL; + int getall = 0; + int i; + + if (!tb[DCB_ATTR_PG_CFG] || + !netdev->dcbnl_ops->getpgtccfgtx || + !netdev->dcbnl_ops->getpgtccfgrx || + !netdev->dcbnl_ops->getpgbwgcfgtx || + !netdev->dcbnl_ops->getpgbwgcfgrx) + return ret; + + ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, + tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest); + + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = (dir) ? DCB_CMD_PGRX_GCFG : DCB_CMD_PGTX_GCFG; + + pg_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PG_CFG); + if (!pg_nest) + goto err; + + if (pg_tb[DCB_PG_ATTR_TC_ALL]) + getall = 1; + + for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) { + if (!getall && !pg_tb[i]) + continue; + + if (pg_tb[DCB_PG_ATTR_TC_ALL]) + data = pg_tb[DCB_PG_ATTR_TC_ALL]; + else + data = pg_tb[i]; + ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, + data, dcbnl_tc_param_nest); + if (ret) + goto err_pg; + + param_nest = nla_nest_start(dcbnl_skb, i); + if (!param_nest) + goto err_pg; + + pgid = DCB_ATTR_VALUE_UNDEFINED; + prio = DCB_ATTR_VALUE_UNDEFINED; + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + up_map = DCB_ATTR_VALUE_UNDEFINED; + + if (dir) { + /* Rx */ + netdev->dcbnl_ops->getpgtccfgrx(netdev, + i - DCB_PG_ATTR_TC_0, &prio, + &pgid, &tc_pct, &up_map); + } else { + /* Tx */ + netdev->dcbnl_ops->getpgtccfgtx(netdev, + i - DCB_PG_ATTR_TC_0, &prio, + &pgid, &tc_pct, &up_map); + } + + if (param_tb[DCB_TC_ATTR_PARAM_PGID] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, + DCB_TC_ATTR_PARAM_PGID, pgid); + if (ret) + goto err_param; + } + if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, + DCB_TC_ATTR_PARAM_UP_MAPPING, up_map); + if (ret) + goto err_param; + } + if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, + DCB_TC_ATTR_PARAM_STRICT_PRIO, prio); + if (ret) + goto err_param; + } + if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT] || + param_tb[DCB_TC_ATTR_PARAM_ALL]) { + ret = nla_put_u8(dcbnl_skb, DCB_TC_ATTR_PARAM_BW_PCT, + tc_pct); + if (ret) + goto err_param; + } + nla_nest_end(dcbnl_skb, param_nest); + } + + if (pg_tb[DCB_PG_ATTR_BW_ID_ALL]) + getall = 1; + else + getall = 0; + + for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) { + if (!getall && !pg_tb[i]) + continue; + + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + + if (dir) { + /* Rx */ + netdev->dcbnl_ops->getpgbwgcfgrx(netdev, + i - DCB_PG_ATTR_BW_ID_0, &tc_pct); + } else { + /* Tx */ + netdev->dcbnl_ops->getpgbwgcfgtx(netdev, + i - DCB_PG_ATTR_BW_ID_0, &tc_pct); + } + ret = nla_put_u8(dcbnl_skb, i, tc_pct); + + if (ret) + goto err_pg; + } + + nla_nest_end(dcbnl_skb, pg_nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; + +err_param: + nla_nest_cancel(dcbnl_skb, param_nest); +err_pg: + nla_nest_cancel(dcbnl_skb, pg_nest); +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + ret = -EINVAL; + return ret; +} + +static int dcbnl_pgtx_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 0); +} + +static int dcbnl_pgrx_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 1); +} + +static int dcbnl_setstate(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + u8 value; + + if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->setstate) + return ret; + + value = nla_get_u8(tb[DCB_ATTR_STATE]); + + netdev->dcbnl_ops->setstate(netdev, value); + + ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_SSTATE, DCB_ATTR_STATE, + pid, seq, flags); + + return ret; +} + +static int dcbnl_setpfccfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1]; + int i; + int ret = -EINVAL; + u8 value; + + if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->setpfccfg) + return ret; + + ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, + tb[DCB_ATTR_PFC_CFG], + dcbnl_pfc_up_nest); + if (ret) + goto err; + + for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) { + if (data[i] == NULL) + continue; + value = nla_get_u8(data[i]); + netdev->dcbnl_ops->setpfccfg(netdev, + data[i]->nla_type - DCB_PFC_UP_ATTR_0, value); + } + + ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SCFG, DCB_ATTR_PFC_CFG, + pid, seq, flags); +err: + return ret; +} + +static int dcbnl_setall(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + + if (!tb[DCB_ATTR_SET_ALL] || !netdev->dcbnl_ops->setall) + return ret; + + ret = dcbnl_reply(netdev->dcbnl_ops->setall(netdev), RTM_SETDCB, + DCB_CMD_SET_ALL, DCB_ATTR_SET_ALL, pid, seq, flags); + + return ret; +} + +static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags, int dir) +{ + struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1]; + struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1]; + int ret = -EINVAL; + int i; + u8 pgid; + u8 up_map; + u8 prio; + u8 tc_pct; + + if (!tb[DCB_ATTR_PG_CFG] || + !netdev->dcbnl_ops->setpgtccfgtx || + !netdev->dcbnl_ops->setpgtccfgrx || + !netdev->dcbnl_ops->setpgbwgcfgtx || + !netdev->dcbnl_ops->setpgbwgcfgrx) + return ret; + + ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, + tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest); + if (ret) + goto err; + + for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) { + if (!pg_tb[i]) + continue; + + ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, + pg_tb[i], dcbnl_tc_param_nest); + if (ret) + goto err; + + pgid = DCB_ATTR_VALUE_UNDEFINED; + prio = DCB_ATTR_VALUE_UNDEFINED; + tc_pct = DCB_ATTR_VALUE_UNDEFINED; + up_map = DCB_ATTR_VALUE_UNDEFINED; + + if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO]) + prio = + nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO]); + + if (param_tb[DCB_TC_ATTR_PARAM_PGID]) + pgid = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_PGID]); + + if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT]) + tc_pct = nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_BW_PCT]); + + if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING]) + up_map = + nla_get_u8(param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING]); + + /* dir: Tx = 0, Rx = 1 */ + if (dir) { + /* Rx */ + netdev->dcbnl_ops->setpgtccfgrx(netdev, + i - DCB_PG_ATTR_TC_0, + prio, pgid, tc_pct, up_map); + } else { + /* Tx */ + netdev->dcbnl_ops->setpgtccfgtx(netdev, + i - DCB_PG_ATTR_TC_0, + prio, pgid, tc_pct, up_map); + } + } + + for (i = DCB_PG_ATTR_BW_ID_0; i <= DCB_PG_ATTR_BW_ID_7; i++) { + if (!pg_tb[i]) + continue; + + tc_pct = nla_get_u8(pg_tb[i]); + + /* dir: Tx = 0, Rx = 1 */ + if (dir) { + /* Rx */ + netdev->dcbnl_ops->setpgbwgcfgrx(netdev, + i - DCB_PG_ATTR_BW_ID_0, tc_pct); + } else { + /* Tx */ + netdev->dcbnl_ops->setpgbwgcfgtx(netdev, + i - DCB_PG_ATTR_BW_ID_0, tc_pct); + } + } + + ret = dcbnl_reply(0, RTM_SETDCB, + (dir ? DCB_CMD_PGRX_SCFG : DCB_CMD_PGTX_SCFG), + DCB_ATTR_PG_CFG, pid, seq, flags); + +err: + return ret; +} + +static int dcbnl_pgtx_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 0); +} + +static int dcbnl_pgrx_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 1); +} + +static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net *net = sock_net(skb->sk); + struct net_device *netdev; + struct dcbmsg *dcb = (struct dcbmsg *)NLMSG_DATA(nlh); + struct nlattr *tb[DCB_ATTR_MAX + 1]; + u32 pid = skb ? NETLINK_CB(skb).pid : 0; + int ret = -EINVAL; + + if (net != &init_net) + return -EINVAL; + + ret = nlmsg_parse(nlh, sizeof(*dcb), tb, DCB_ATTR_MAX, + dcbnl_rtnl_policy); + if (ret < 0) + return ret; + + if (!tb[DCB_ATTR_IFNAME]) + return -EINVAL; + + netdev = dev_get_by_name(&init_net, nla_data(tb[DCB_ATTR_IFNAME])); + if (!netdev) + return -EINVAL; + + if (!netdev->dcbnl_ops) + goto errout; + + switch (dcb->cmd) { + case DCB_CMD_GSTATE: + ret = dcbnl_getstate(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PFC_GCFG: + ret = dcbnl_getpfccfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_GPERM_HWADDR: + ret = dcbnl_getperm_hwaddr(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGTX_GCFG: + ret = dcbnl_pgtx_getcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGRX_GCFG: + ret = dcbnl_pgrx_getcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_SSTATE: + ret = dcbnl_setstate(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PFC_SCFG: + ret = dcbnl_setpfccfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + + case DCB_CMD_SET_ALL: + ret = dcbnl_setall(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGTX_SCFG: + ret = dcbnl_pgtx_setcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PGRX_SCFG: + ret = dcbnl_pgrx_setcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + default: + goto errout; + } +errout: + ret = -EINVAL; +out: + dev_put(netdev); + return ret; +} + +static int __init dcbnl_init(void) +{ + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL); + rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL); + + return 0; +} +module_init(dcbnl_init); + +static void __exit dcbnl_exit(void) +{ + rtnl_unregister(PF_UNSPEC, RTM_GETDCB); + rtnl_unregister(PF_UNSPEC, RTM_SETDCB); +} +module_exit(dcbnl_exit); + + -- cgit v1.2.3 From 46132188bf72e22ef097f16ed5c969ee8cea1e8b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 21:05:08 -0800 Subject: DCB: Add interface to query for the DCB capabilities of an device. Adds to the netlink interface for Data Center Bridging (DCB), allowing the DCB capabilities supported by a device to be queried. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_dcb_nl.c | 42 +++++++++++++++++++- include/linux/dcbnl.h | 37 ++++++++++++++++++ include/net/dcbnl.h | 1 + net/dcb/dcbnl.c | 82 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 161 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c index 50bff2af6b04..eb3a6cea1aaa 100644 --- a/drivers/net/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c @@ -337,6 +337,45 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev) return ret; } +static u8 ixgbe_dcbnl_getcap(struct net_device *netdev, int capid, u8 *cap) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + u8 rval = 0; + + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + switch (capid) { + case DCB_CAP_ATTR_PG: + *cap = true; + break; + case DCB_CAP_ATTR_PFC: + *cap = true; + break; + case DCB_CAP_ATTR_UP2TC: + *cap = false; + break; + case DCB_CAP_ATTR_PG_TCS: + *cap = 0x80; + break; + case DCB_CAP_ATTR_PFC_TCS: + *cap = 0x80; + break; + case DCB_CAP_ATTR_GSP: + *cap = true; + break; + case DCB_CAP_ATTR_BCN: + *cap = false; + break; + default: + rval = -EINVAL; + break; + } + } else { + rval = -EINVAL; + } + + return rval; +} + struct dcbnl_rtnl_ops dcbnl_ops = { .getstate = ixgbe_dcbnl_get_state, .setstate = ixgbe_dcbnl_set_state, @@ -351,6 +390,7 @@ struct dcbnl_rtnl_ops dcbnl_ops = { .getpgbwgcfgrx = ixgbe_dcbnl_get_pg_bwg_cfg_rx, .setpfccfg = ixgbe_dcbnl_set_pfc_cfg, .getpfccfg = ixgbe_dcbnl_get_pfc_cfg, - .setall = ixgbe_dcbnl_set_all + .setall = ixgbe_dcbnl_set_all, + .getcap = ixgbe_dcbnl_getcap }; diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 32d32c1ee410..13f0c638a695 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -43,6 +43,7 @@ struct dcbmsg { * @DCB_CMD_SET_ALL: apply all changes to the underlying device * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying * device. Only useful when using bonding. + * @DCB_CMD_GCAP: request the DCB capabilities of the device */ enum dcbnl_commands { DCB_CMD_UNDEFINED, @@ -60,6 +61,7 @@ enum dcbnl_commands { DCB_CMD_SET_ALL, DCB_CMD_GPERM_HWADDR, + DCB_CMD_GCAP, __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, @@ -78,6 +80,7 @@ enum dcbnl_commands { * @DCB_ATTR_PG_CFG: priority group configuration (NLA_NESTED) * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8) * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED) + * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED) */ enum dcbnl_attrs { DCB_ATTR_UNDEFINED, @@ -90,6 +93,7 @@ enum dcbnl_attrs { DCB_ATTR_PG_CFG, DCB_ATTR_SET_ALL, DCB_ATTR_PERM_HWADDR, + DCB_ATTR_CAP, __DCB_ATTR_ENUM_MAX, DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, @@ -216,6 +220,39 @@ enum dcbnl_tc_attrs { DCB_TC_ATTR_PARAM_MAX = __DCB_TC_ATTR_PARAM_ENUM_MAX - 1, }; +/** + * enum dcbnl_cap_attrs - DCB Capability attributes + * + * @DCB_CAP_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_CAP_ATTR_ALL: (NLA_FLAG) all capability parameters + * @DCB_CAP_ATTR_PG: (NLA_U8) device supports Priority Groups + * @DCB_CAP_ATTR_PFC: (NLA_U8) device supports Priority Flow Control + * @DCB_CAP_ATTR_UP2TC: (NLA_U8) device supports user priority to + * traffic class mapping + * @DCB_CAP_ATTR_PG_TCS: (NLA_U8) bitmap where each bit represents a + * number of traffic classes the device + * can be configured to use for Priority Groups + * @DCB_CAP_ATTR_PFC_TCS: (NLA_U8) bitmap where each bit represents a + * number of traffic classes the device can be + * configured to use for Priority Flow Control + * @DCB_CAP_ATTR_GSP: (NLA_U8) device supports group strict priority + * @DCB_CAP_ATTR_BCN: (NLA_U8) device supports Backwards Congestion + * Notification + */ +enum dcbnl_cap_attrs { + DCB_CAP_ATTR_UNDEFINED, + DCB_CAP_ATTR_ALL, + DCB_CAP_ATTR_PG, + DCB_CAP_ATTR_PFC, + DCB_CAP_ATTR_UP2TC, + DCB_CAP_ATTR_PG_TCS, + DCB_CAP_ATTR_PFC_TCS, + DCB_CAP_ATTR_GSP, + DCB_CAP_ATTR_BCN, + + __DCB_CAP_ATTR_ENUM_MAX, + DCB_CAP_ATTR_MAX = __DCB_CAP_ATTR_ENUM_MAX - 1, +}; /** * enum dcb_general_attr_values - general DCB attribute values * diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index 0ef0c5a46d8b..183ed040cf4c 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -39,6 +39,7 @@ struct dcbnl_rtnl_ops { void (*setpfccfg)(struct net_device *, int, u8); void (*getpfccfg)(struct net_device *, int, u8 *); u8 (*setall)(struct net_device *); + u8 (*getcap)(struct net_device *, int, u8 *); }; #endif /* __NET_DCBNL_H__ */ diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 516e8be83d72..de61d64d102d 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -61,6 +61,7 @@ static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { [DCB_ATTR_PG_CFG] = {.type = NLA_NESTED}, [DCB_ATTR_SET_ALL] = {.type = NLA_U8}, [DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG}, + [DCB_ATTR_CAP] = {.type = NLA_NESTED}, }; /* DCB priority flow control to User Priority nested attributes */ @@ -107,6 +108,17 @@ static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = { [DCB_TC_ATTR_PARAM_ALL] = {.type = NLA_FLAG}, }; +/* DCB capabilities nested attributes. */ +static struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = { + [DCB_CAP_ATTR_ALL] = {.type = NLA_FLAG}, + [DCB_CAP_ATTR_PG] = {.type = NLA_U8}, + [DCB_CAP_ATTR_PFC] = {.type = NLA_U8}, + [DCB_CAP_ATTR_UP2TC] = {.type = NLA_U8}, + [DCB_CAP_ATTR_PG_TCS] = {.type = NLA_U8}, + [DCB_CAP_ATTR_PFC_TCS] = {.type = NLA_U8}, + [DCB_CAP_ATTR_GSP] = {.type = NLA_U8}, + [DCB_CAP_ATTR_BCN] = {.type = NLA_U8}, +}; /* standard netlink reply call */ static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid, @@ -269,6 +281,72 @@ err_out: return -EINVAL; } +static int dcbnl_getcap(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *data[DCB_CAP_ATTR_MAX + 1], *nest; + u8 value; + int ret = -EINVAL; + int i; + int getall = 0; + + if (!tb[DCB_ATTR_CAP] || !netdev->dcbnl_ops->getcap) + return ret; + + ret = nla_parse_nested(data, DCB_CAP_ATTR_MAX, tb[DCB_ATTR_CAP], + dcbnl_cap_nest); + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_GCAP; + + nest = nla_nest_start(dcbnl_skb, DCB_ATTR_CAP); + if (!nest) + goto err; + + if (data[DCB_CAP_ATTR_ALL]) + getall = 1; + + for (i = DCB_CAP_ATTR_ALL+1; i <= DCB_CAP_ATTR_MAX; i++) { + if (!getall && !data[i]) + continue; + + if (!netdev->dcbnl_ops->getcap(netdev, i, &value)) { + ret = nla_put_u8(dcbnl_skb, i, value); + + if (ret) { + nla_nest_cancel(dcbnl_skb, nest); + goto err; + } + } + } + nla_nest_end(dcbnl_skb, nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + return -EINVAL; +} + static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb, u32 pid, u32 seq, u16 flags, int dir) { @@ -675,6 +753,10 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ret = dcbnl_pgrx_setcfg(netdev, tb, pid, nlh->nlmsg_seq, nlh->nlmsg_flags); goto out; + case DCB_CMD_GCAP: + ret = dcbnl_getcap(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; default: goto errout; } -- cgit v1.2.3 From 33dbabc4a7f7bd72313c73a3c199f31f3900336f Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 21:08:19 -0800 Subject: DCB: Add interface to query # of TCs supported by device Adds interface for Data Center Bridging (DCB) to query (and set if supported) the number of traffic classes currently supported by the device for the two (DCB) features: priority groups (PG) and priority flow control (PFC). Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_dcb_nl.c | 33 +++++++++- include/linux/dcbnl.h | 27 ++++++++ include/net/dcbnl.h | 2 + net/dcb/dcbnl.c | 132 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 193 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c index eb3a6cea1aaa..5921795f8403 100644 --- a/drivers/net/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c @@ -376,6 +376,35 @@ static u8 ixgbe_dcbnl_getcap(struct net_device *netdev, int capid, u8 *cap) return rval; } +static u8 ixgbe_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + u8 rval = 0; + + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + switch (tcid) { + case DCB_NUMTCS_ATTR_PG: + *num = MAX_TRAFFIC_CLASS; + break; + case DCB_NUMTCS_ATTR_PFC: + *num = MAX_TRAFFIC_CLASS; + break; + default: + rval = -EINVAL; + break; + } + } else { + rval = -EINVAL; + } + + return rval; +} + +static u8 ixgbe_dcbnl_setnumtcs(struct net_device *netdev, int tcid, u8 num) +{ + return -EINVAL; +} + struct dcbnl_rtnl_ops dcbnl_ops = { .getstate = ixgbe_dcbnl_get_state, .setstate = ixgbe_dcbnl_set_state, @@ -391,6 +420,8 @@ struct dcbnl_rtnl_ops dcbnl_ops = { .setpfccfg = ixgbe_dcbnl_set_pfc_cfg, .getpfccfg = ixgbe_dcbnl_get_pfc_cfg, .setall = ixgbe_dcbnl_set_all, - .getcap = ixgbe_dcbnl_getcap + .getcap = ixgbe_dcbnl_getcap, + .getnumtcs = ixgbe_dcbnl_getnumtcs, + .setnumtcs = ixgbe_dcbnl_setnumtcs }; diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 13f0c638a695..1077fba1dadc 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -44,6 +44,8 @@ struct dcbmsg { * @DCB_CMD_GPERM_HWADDR: get the permanent MAC address of the underlying * device. Only useful when using bonding. * @DCB_CMD_GCAP: request the DCB capabilities of the device + * @DCB_CMD_GNUMTCS: get the number of traffic classes currently supported + * @DCB_CMD_SNUMTCS: set the number of traffic classes */ enum dcbnl_commands { DCB_CMD_UNDEFINED, @@ -62,6 +64,8 @@ enum dcbnl_commands { DCB_CMD_SET_ALL, DCB_CMD_GPERM_HWADDR, DCB_CMD_GCAP, + DCB_CMD_GNUMTCS, + DCB_CMD_SNUMTCS, __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, @@ -81,6 +85,7 @@ enum dcbnl_commands { * @DCB_ATTR_SET_ALL: bool to commit changes to hardware or not (NLA_U8) * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED) * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED) + * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED) */ enum dcbnl_attrs { DCB_ATTR_UNDEFINED, @@ -94,6 +99,7 @@ enum dcbnl_attrs { DCB_ATTR_SET_ALL, DCB_ATTR_PERM_HWADDR, DCB_ATTR_CAP, + DCB_ATTR_NUMTCS, __DCB_ATTR_ENUM_MAX, DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, @@ -253,6 +259,27 @@ enum dcbnl_cap_attrs { __DCB_CAP_ATTR_ENUM_MAX, DCB_CAP_ATTR_MAX = __DCB_CAP_ATTR_ENUM_MAX - 1, }; + +/** + * enum dcbnl_numtcs_attrs - number of traffic classes + * + * @DCB_NUMTCS_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_NUMTCS_ATTR_ALL: (NLA_FLAG) all traffic class attributes + * @DCB_NUMTCS_ATTR_PG: (NLA_U8) number of traffic classes used for + * priority groups + * @DCB_NUMTCS_ATTR_PFC: (NLA_U8) number of traffic classes which can + * support priority flow control + */ +enum dcbnl_numtcs_attrs { + DCB_NUMTCS_ATTR_UNDEFINED, + DCB_NUMTCS_ATTR_ALL, + DCB_NUMTCS_ATTR_PG, + DCB_NUMTCS_ATTR_PFC, + + __DCB_NUMTCS_ATTR_ENUM_MAX, + DCB_NUMTCS_ATTR_MAX = __DCB_NUMTCS_ATTR_ENUM_MAX - 1, +}; + /** * enum dcb_general_attr_values - general DCB attribute values * diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index 183ed040cf4c..f0a65281ea73 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -40,6 +40,8 @@ struct dcbnl_rtnl_ops { void (*getpfccfg)(struct net_device *, int, u8 *); u8 (*setall)(struct net_device *); u8 (*getcap)(struct net_device *, int, u8 *); + u8 (*getnumtcs)(struct net_device *, int, u8 *); + u8 (*setnumtcs)(struct net_device *, int, u8); }; #endif /* __NET_DCBNL_H__ */ diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index de61d64d102d..5ff7e3c0c172 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -120,6 +120,13 @@ static struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = { [DCB_CAP_ATTR_BCN] = {.type = NLA_U8}, }; +/* DCB capabilities nested attributes. */ +static struct nla_policy dcbnl_numtcs_nest[DCB_NUMTCS_ATTR_MAX + 1] = { + [DCB_NUMTCS_ATTR_ALL] = {.type = NLA_FLAG}, + [DCB_NUMTCS_ATTR_PG] = {.type = NLA_U8}, + [DCB_NUMTCS_ATTR_PFC] = {.type = NLA_U8}, +}; + /* standard netlink reply call */ static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid, u32 seq, u16 flags) @@ -347,6 +354,123 @@ err_out: return -EINVAL; } +static int dcbnl_getnumtcs(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *data[DCB_NUMTCS_ATTR_MAX + 1], *nest; + u8 value; + int ret = -EINVAL; + int i; + int getall = 0; + + if (!tb[DCB_ATTR_NUMTCS] || !netdev->dcbnl_ops->getnumtcs) + return ret; + + ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], + dcbnl_numtcs_nest); + if (ret) { + ret = -EINVAL; + goto err_out; + } + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) { + ret = -EINVAL; + goto err_out; + } + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_GNUMTCS; + + nest = nla_nest_start(dcbnl_skb, DCB_ATTR_NUMTCS); + if (!nest) { + ret = -EINVAL; + goto err; + } + + if (data[DCB_NUMTCS_ATTR_ALL]) + getall = 1; + + for (i = DCB_NUMTCS_ATTR_ALL+1; i <= DCB_NUMTCS_ATTR_MAX; i++) { + if (!getall && !data[i]) + continue; + + ret = netdev->dcbnl_ops->getnumtcs(netdev, i, &value); + if (!ret) { + ret = nla_put_u8(dcbnl_skb, i, value); + + if (ret) { + nla_nest_cancel(dcbnl_skb, nest); + ret = -EINVAL; + goto err; + } + } else { + goto err; + } + } + nla_nest_end(dcbnl_skb, nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) { + ret = -EINVAL; + goto err; + } + + return 0; +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + return ret; +} + +static int dcbnl_setnumtcs(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct nlattr *data[DCB_NUMTCS_ATTR_MAX + 1]; + int ret = -EINVAL; + u8 value; + int i; + + if (!tb[DCB_ATTR_NUMTCS] || !netdev->dcbnl_ops->setstate) + return ret; + + ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], + dcbnl_numtcs_nest); + + if (ret) { + ret = -EINVAL; + goto err; + } + + for (i = DCB_NUMTCS_ATTR_ALL+1; i <= DCB_NUMTCS_ATTR_MAX; i++) { + if (data[i] == NULL) + continue; + + value = nla_get_u8(data[i]); + + ret = netdev->dcbnl_ops->setnumtcs(netdev, i, value); + + if (ret) + goto operr; + } + +operr: + ret = dcbnl_reply(!!ret, RTM_SETDCB, DCB_CMD_SNUMTCS, + DCB_ATTR_NUMTCS, pid, seq, flags); + +err: + return ret; +} + static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb, u32 pid, u32 seq, u16 flags, int dir) { @@ -757,6 +881,14 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ret = dcbnl_getcap(netdev, tb, pid, nlh->nlmsg_seq, nlh->nlmsg_flags); goto out; + case DCB_CMD_GNUMTCS: + ret = dcbnl_getnumtcs(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_SNUMTCS: + ret = dcbnl_setnumtcs(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; default: goto errout; } -- cgit v1.2.3 From 0eb3aa9bab20217fb42244ccdcb5bf8a002f504c Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 21:09:23 -0800 Subject: DCB: Add interface to query the state of PFC feature. Adds a netlink interface for Data Center Bridging (DCB) to get and set the enable state of the Priority Flow Control (PFC) feature. Primarily, this is a way to turn off PFC in the driver while DCB remains enabled. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_dcb_nl.c | 16 ++++++++++++++- include/linux/dcbnl.h | 2 ++ include/net/dcbnl.h | 2 ++ net/dcb/dcbnl.c | 43 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c index 5921795f8403..dd940a8f9357 100644 --- a/drivers/net/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c @@ -405,6 +405,18 @@ static u8 ixgbe_dcbnl_setnumtcs(struct net_device *netdev, int tcid, u8 num) return -EINVAL; } +static u8 ixgbe_dcbnl_getpfcstate(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + return !!(adapter->flags & IXGBE_FLAG_DCB_ENABLED); +} + +static void ixgbe_dcbnl_setpfcstate(struct net_device *netdev, u8 state) +{ + return; +} + struct dcbnl_rtnl_ops dcbnl_ops = { .getstate = ixgbe_dcbnl_get_state, .setstate = ixgbe_dcbnl_set_state, @@ -422,6 +434,8 @@ struct dcbnl_rtnl_ops dcbnl_ops = { .setall = ixgbe_dcbnl_set_all, .getcap = ixgbe_dcbnl_getcap, .getnumtcs = ixgbe_dcbnl_getnumtcs, - .setnumtcs = ixgbe_dcbnl_setnumtcs + .setnumtcs = ixgbe_dcbnl_setnumtcs, + .getpfcstate = ixgbe_dcbnl_getpfcstate, + .setpfcstate = ixgbe_dcbnl_setpfcstate }; diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 1077fba1dadc..6cc4560bc376 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -66,6 +66,8 @@ enum dcbnl_commands { DCB_CMD_GCAP, DCB_CMD_GNUMTCS, DCB_CMD_SNUMTCS, + DCB_CMD_PFC_GSTATE, + DCB_CMD_PFC_SSTATE, __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index f0a65281ea73..c7d87caf3f99 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -42,6 +42,8 @@ struct dcbnl_rtnl_ops { u8 (*getcap)(struct net_device *, int, u8 *); u8 (*getnumtcs)(struct net_device *, int, u8 *); u8 (*setnumtcs)(struct net_device *, int, u8); + u8 (*getpfcstate)(struct net_device *); + void (*setpfcstate)(struct net_device *, u8); }; #endif /* __NET_DCBNL_H__ */ diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 5ff7e3c0c172..758419c6f59b 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -62,6 +62,7 @@ static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { [DCB_ATTR_SET_ALL] = {.type = NLA_U8}, [DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG}, [DCB_ATTR_CAP] = {.type = NLA_NESTED}, + [DCB_ATTR_PFC_STATE] = {.type = NLA_U8}, }; /* DCB priority flow control to User Priority nested attributes */ @@ -471,6 +472,40 @@ err: return ret; } +static int dcbnl_getpfcstate(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + + if (!netdev->dcbnl_ops->getpfcstate) + return ret; + + ret = dcbnl_reply(netdev->dcbnl_ops->getpfcstate(netdev), RTM_GETDCB, + DCB_CMD_PFC_GSTATE, DCB_ATTR_PFC_STATE, + pid, seq, flags); + + return ret; +} + +static int dcbnl_setpfcstate(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + u8 value; + + if (!tb[DCB_ATTR_PFC_STATE] || !netdev->dcbnl_ops->setpfcstate) + return ret; + + value = nla_get_u8(tb[DCB_ATTR_PFC_STATE]); + + netdev->dcbnl_ops->setpfcstate(netdev, value); + + ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SSTATE, DCB_ATTR_PFC_STATE, + pid, seq, flags); + + return ret; +} + static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb, u32 pid, u32 seq, u16 flags, int dir) { @@ -889,6 +924,14 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ret = dcbnl_setnumtcs(netdev, tb, pid, nlh->nlmsg_seq, nlh->nlmsg_flags); goto out; + case DCB_CMD_PFC_GSTATE: + ret = dcbnl_getpfcstate(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_PFC_SSTATE: + ret = dcbnl_setpfcstate(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; default: goto errout; } -- cgit v1.2.3 From 859ee3c43812051e21816c6d6d4cc04fb7ce9b2e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 20 Nov 2008 21:10:23 -0800 Subject: DCB: Add support for DCB BCN Adds an interface to configure the Backward Congestion Notification (BCN) feature. In a BCN capabale network, congestion notifications from congested points out in the network can cause the end station limit the rate of a given traffic flow. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: Peter P Waskiewicz Jr Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_dcb.h | 27 ++++++ drivers/net/ixgbe/ixgbe_dcb_nl.c | 176 ++++++++++++++++++++++++++++++++++++++- include/linux/dcbnl.h | 44 +++++++++- include/net/dcbnl.h | 4 + net/dcb/dcbnl.c | 174 ++++++++++++++++++++++++++++++++++++-- 5 files changed, 416 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/ixgbe/ixgbe_dcb.h b/drivers/net/ixgbe/ixgbe_dcb.h index 62dfd243bedc..75f6efe1e369 100644 --- a/drivers/net/ixgbe/ixgbe_dcb.h +++ b/drivers/net/ixgbe/ixgbe_dcb.h @@ -108,7 +108,34 @@ enum dcb_rx_pba_cfg { pba_80_48 /* PBA[0-3] each use 80KB, PBA[4-7] each use 48KB */ }; +/* + * This structure contains many values encoded as fixed-point + * numbers, meaning that some of bits are dedicated to the + * magnitude and others to the fraction part. In the comments + * this is shown as f=n, where n is the number of fraction bits. + * These fraction bits are always the low-order bits. The size + * of the magnitude is not specified. + */ +struct bcn_config { + u32 rp_admin_mode[MAX_TRAFFIC_CLASS]; /* BCN enabled, per TC */ + u32 bcna_option[2]; /* BCNA Port + MAC Addr */ + u32 rp_w; /* Derivative Weight, f=3 */ + u32 rp_gi; /* Increase Gain, f=12 */ + u32 rp_gd; /* Decrease Gain, f=12 */ + u32 rp_ru; /* Rate Unit */ + u32 rp_alpha; /* Max Decrease Factor, f=12 */ + u32 rp_beta; /* Max Increase Factor, f=12 */ + u32 rp_ri; /* Initial Rate */ + u32 rp_td; /* Drift Interval Timer */ + u32 rp_rd; /* Drift Increase */ + u32 rp_tmax; /* Severe Congestion Backoff Timer Range */ + u32 rp_rmin; /* Severe Congestion Restart Rate */ + u32 rp_wrtt; /* RTT Moving Average Weight */ +}; + struct ixgbe_dcb_config { + struct bcn_config bcn; + struct tc_configuration tc_config[MAX_TRAFFIC_CLASS]; u8 bw_percentage[2][MAX_BW_GROUP]; /* One each for Tx/Rx */ diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c index dd940a8f9357..615c2803202a 100644 --- a/drivers/net/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c @@ -34,6 +34,7 @@ #define BIT_PFC 0x02 #define BIT_PG_RX 0x04 #define BIT_PG_TX 0x08 +#define BIT_BCN 0x10 int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg, struct ixgbe_dcb_config *dst_dcb_cfg, int tc_max) @@ -88,6 +89,23 @@ int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg, src_dcb_cfg->tc_config[i - DCB_PFC_UP_ATTR_0].dcb_pfc; } + for (i = DCB_BCN_ATTR_RP_0; i < DCB_BCN_ATTR_RP_ALL; i++) { + dst_dcb_cfg->bcn.rp_admin_mode[i - DCB_BCN_ATTR_RP_0] = + src_dcb_cfg->bcn.rp_admin_mode[i - DCB_BCN_ATTR_RP_0]; + } + dst_dcb_cfg->bcn.rp_alpha = src_dcb_cfg->bcn.rp_alpha; + dst_dcb_cfg->bcn.rp_beta = src_dcb_cfg->bcn.rp_beta; + dst_dcb_cfg->bcn.rp_gd = src_dcb_cfg->bcn.rp_gd; + dst_dcb_cfg->bcn.rp_gi = src_dcb_cfg->bcn.rp_gi; + dst_dcb_cfg->bcn.rp_tmax = src_dcb_cfg->bcn.rp_tmax; + dst_dcb_cfg->bcn.rp_td = src_dcb_cfg->bcn.rp_td; + dst_dcb_cfg->bcn.rp_rmin = src_dcb_cfg->bcn.rp_rmin; + dst_dcb_cfg->bcn.rp_w = src_dcb_cfg->bcn.rp_w; + dst_dcb_cfg->bcn.rp_rd = src_dcb_cfg->bcn.rp_rd; + dst_dcb_cfg->bcn.rp_ru = src_dcb_cfg->bcn.rp_ru; + dst_dcb_cfg->bcn.rp_wrtt = src_dcb_cfg->bcn.rp_wrtt; + dst_dcb_cfg->bcn.rp_ri = src_dcb_cfg->bcn.rp_ri; + return 0; } @@ -313,6 +331,7 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev) struct ixgbe_adapter *adapter = netdev_priv(netdev); int ret; + adapter->dcb_set_bitmap &= ~BIT_BCN; /* no set for BCN */ if (!adapter->dcb_set_bitmap) return 1; @@ -417,6 +436,157 @@ static void ixgbe_dcbnl_setpfcstate(struct net_device *netdev, u8 state) return; } +static void ixgbe_dcbnl_getbcnrp(struct net_device *netdev, int priority, + u8 *setting) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + *setting = adapter->dcb_cfg.bcn.rp_admin_mode[priority]; +} + + +static void ixgbe_dcbnl_getbcncfg(struct net_device *netdev, int enum_index, + u32 *setting) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + switch (enum_index) { + case DCB_BCN_ATTR_ALPHA: + *setting = adapter->dcb_cfg.bcn.rp_alpha; + break; + case DCB_BCN_ATTR_BETA: + *setting = adapter->dcb_cfg.bcn.rp_beta; + break; + case DCB_BCN_ATTR_GD: + *setting = adapter->dcb_cfg.bcn.rp_gd; + break; + case DCB_BCN_ATTR_GI: + *setting = adapter->dcb_cfg.bcn.rp_gi; + break; + case DCB_BCN_ATTR_TMAX: + *setting = adapter->dcb_cfg.bcn.rp_tmax; + break; + case DCB_BCN_ATTR_TD: + *setting = adapter->dcb_cfg.bcn.rp_td; + break; + case DCB_BCN_ATTR_RMIN: + *setting = adapter->dcb_cfg.bcn.rp_rmin; + break; + case DCB_BCN_ATTR_W: + *setting = adapter->dcb_cfg.bcn.rp_w; + break; + case DCB_BCN_ATTR_RD: + *setting = adapter->dcb_cfg.bcn.rp_rd; + break; + case DCB_BCN_ATTR_RU: + *setting = adapter->dcb_cfg.bcn.rp_ru; + break; + case DCB_BCN_ATTR_WRTT: + *setting = adapter->dcb_cfg.bcn.rp_wrtt; + break; + case DCB_BCN_ATTR_RI: + *setting = adapter->dcb_cfg.bcn.rp_ri; + break; + default: + *setting = -1; + } +} + +static void ixgbe_dcbnl_setbcnrp(struct net_device *netdev, int priority, + u8 setting) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + adapter->temp_dcb_cfg.bcn.rp_admin_mode[priority] = setting; + + if (adapter->temp_dcb_cfg.bcn.rp_admin_mode[priority] != + adapter->dcb_cfg.bcn.rp_admin_mode[priority]) + adapter->dcb_set_bitmap |= BIT_BCN; +} + +static void ixgbe_dcbnl_setbcncfg(struct net_device *netdev, int enum_index, + u32 setting) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + switch (enum_index) { + case DCB_BCN_ATTR_ALPHA: + adapter->temp_dcb_cfg.bcn.rp_alpha = setting; + if (adapter->temp_dcb_cfg.bcn.rp_alpha != + adapter->dcb_cfg.bcn.rp_alpha) + adapter->dcb_set_bitmap |= BIT_BCN; + break; + case DCB_BCN_ATTR_BETA: + adapter->temp_dcb_cfg.bcn.rp_beta = setting; + if (adapter->temp_dcb_cfg.bcn.rp_beta != + adapter->dcb_cfg.bcn.rp_beta) + adapter->dcb_set_bitmap |= BIT_BCN; + break; + case DCB_BCN_ATTR_GD: + adapter->temp_dcb_cfg.bcn.rp_gd = setting; + if (adapter->temp_dcb_cfg.bcn.rp_gd != + adapter->dcb_cfg.bcn.rp_gd) + adapter->dcb_set_bitmap |= BIT_BCN; + break; + case DCB_BCN_ATTR_GI: + adapter->temp_dcb_cfg.bcn.rp_gi = setting; + if (adapter->temp_dcb_cfg.bcn.rp_gi != + adapter->dcb_cfg.bcn.rp_gi) + adapter->dcb_set_bitmap |= BIT_BCN; + break; + case DCB_BCN_ATTR_TMAX: + adapter->temp_dcb_cfg.bcn.rp_tmax = setting; + if (adapter->temp_dcb_cfg.bcn.rp_tmax != + adapter->dcb_cfg.bcn.rp_tmax) + adapter->dcb_set_bitmap |= BIT_BCN; + break; + case DCB_BCN_ATTR_TD: + adapter->temp_dcb_cfg.bcn.rp_td = setting; + if (adapter->temp_dcb_cfg.bcn.rp_td != + adapter->dcb_cfg.bcn.rp_td) + adapter->dcb_set_bitmap |= BIT_BCN; + break; + case DCB_BCN_ATTR_RMIN: + adapter->temp_dcb_cfg.bcn.rp_rmin = setting; + if (adapter->temp_dcb_cfg.bcn.rp_rmin != + adapter->dcb_cfg.bcn.rp_rmin) + adapter->dcb_set_bitmap |= BIT_BCN; + break; + case DCB_BCN_ATTR_W: + adapter->temp_dcb_cfg.bcn.rp_w = setting; + if (adapter->temp_dcb_cfg.bcn.rp_w != + adapter->dcb_cfg.bcn.rp_w) + adapter->dcb_set_bitmap |= BIT_BCN; + break; + case DCB_BCN_ATTR_RD: + adapter->temp_dcb_cfg.bcn.rp_rd = setting; + if (adapter->temp_dcb_cfg.bcn.rp_rd != + adapter->dcb_cfg.bcn.rp_rd) + adapter->dcb_set_bitmap |= BIT_BCN; + break; + case DCB_BCN_ATTR_RU: + adapter->temp_dcb_cfg.bcn.rp_ru = setting; + if (adapter->temp_dcb_cfg.bcn.rp_ru != + adapter->dcb_cfg.bcn.rp_ru) + adapter->dcb_set_bitmap |= BIT_BCN; + break; + case DCB_BCN_ATTR_WRTT: + adapter->temp_dcb_cfg.bcn.rp_wrtt = setting; + if (adapter->temp_dcb_cfg.bcn.rp_wrtt != + adapter->dcb_cfg.bcn.rp_wrtt) + adapter->dcb_set_bitmap |= BIT_BCN; + break; + case DCB_BCN_ATTR_RI: + adapter->temp_dcb_cfg.bcn.rp_ri = setting; + if (adapter->temp_dcb_cfg.bcn.rp_ri != + adapter->dcb_cfg.bcn.rp_ri) + adapter->dcb_set_bitmap |= BIT_BCN; + break; + default: + break; + } +} + struct dcbnl_rtnl_ops dcbnl_ops = { .getstate = ixgbe_dcbnl_get_state, .setstate = ixgbe_dcbnl_set_state, @@ -436,6 +606,10 @@ struct dcbnl_rtnl_ops dcbnl_ops = { .getnumtcs = ixgbe_dcbnl_getnumtcs, .setnumtcs = ixgbe_dcbnl_setnumtcs, .getpfcstate = ixgbe_dcbnl_getpfcstate, - .setpfcstate = ixgbe_dcbnl_setpfcstate + .setpfcstate = ixgbe_dcbnl_setpfcstate, + .getbcncfg = ixgbe_dcbnl_getbcncfg, + .getbcnrp = ixgbe_dcbnl_getbcnrp, + .setbcncfg = ixgbe_dcbnl_setbcncfg, + .setbcnrp = ixgbe_dcbnl_setbcnrp }; diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 6cc4560bc376..e73a61449ad6 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -46,6 +46,8 @@ struct dcbmsg { * @DCB_CMD_GCAP: request the DCB capabilities of the device * @DCB_CMD_GNUMTCS: get the number of traffic classes currently supported * @DCB_CMD_SNUMTCS: set the number of traffic classes + * @DCB_CMD_GBCN: set backward congestion notification configuration + * @DCB_CMD_SBCN: get backward congestion notification configration. */ enum dcbnl_commands { DCB_CMD_UNDEFINED, @@ -62,18 +64,24 @@ enum dcbnl_commands { DCB_CMD_PFC_SCFG, DCB_CMD_SET_ALL, + DCB_CMD_GPERM_HWADDR, + DCB_CMD_GCAP, + DCB_CMD_GNUMTCS, DCB_CMD_SNUMTCS, + DCB_CMD_PFC_GSTATE, DCB_CMD_PFC_SSTATE, + DCB_CMD_BCN_GCFG, + DCB_CMD_BCN_SCFG, + __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, }; - /** * enum dcbnl_attrs - DCB top-level netlink attributes * @@ -88,6 +96,7 @@ enum dcbnl_commands { * @DCB_ATTR_PERM_HWADDR: MAC address of the physical device (NLA_NESTED) * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED) * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED) + * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED) */ enum dcbnl_attrs { DCB_ATTR_UNDEFINED, @@ -102,6 +111,7 @@ enum dcbnl_attrs { DCB_ATTR_PERM_HWADDR, DCB_ATTR_CAP, DCB_ATTR_NUMTCS, + DCB_ATTR_BCN, __DCB_ATTR_ENUM_MAX, DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, @@ -282,6 +292,38 @@ enum dcbnl_numtcs_attrs { DCB_NUMTCS_ATTR_MAX = __DCB_NUMTCS_ATTR_ENUM_MAX - 1, }; +enum dcbnl_bcn_attrs{ + DCB_BCN_ATTR_UNDEFINED = 0, + + DCB_BCN_ATTR_RP_0, + DCB_BCN_ATTR_RP_1, + DCB_BCN_ATTR_RP_2, + DCB_BCN_ATTR_RP_3, + DCB_BCN_ATTR_RP_4, + DCB_BCN_ATTR_RP_5, + DCB_BCN_ATTR_RP_6, + DCB_BCN_ATTR_RP_7, + DCB_BCN_ATTR_RP_ALL, + + DCB_BCN_ATTR_ALPHA, + DCB_BCN_ATTR_BETA, + DCB_BCN_ATTR_GD, + DCB_BCN_ATTR_GI, + DCB_BCN_ATTR_TMAX, + DCB_BCN_ATTR_TD, + DCB_BCN_ATTR_RMIN, + DCB_BCN_ATTR_W, + DCB_BCN_ATTR_RD, + DCB_BCN_ATTR_RU, + DCB_BCN_ATTR_WRTT, + DCB_BCN_ATTR_RI, + DCB_BCN_ATTR_C, + DCB_BCN_ATTR_ALL, + + __DCB_BCN_ATTR_ENUM_MAX, + DCB_BCN_ATTR_MAX = __DCB_BCN_ATTR_ENUM_MAX - 1, +}; + /** * enum dcb_general_attr_values - general DCB attribute values * diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index c7d87caf3f99..91e0a3d7faf2 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -44,6 +44,10 @@ struct dcbnl_rtnl_ops { u8 (*setnumtcs)(struct net_device *, int, u8); u8 (*getpfcstate)(struct net_device *); void (*setpfcstate)(struct net_device *, u8); + void (*getbcncfg)(struct net_device *, int, u32 *); + void (*setbcncfg)(struct net_device *, int, u32); + void (*getbcnrp)(struct net_device *, int, u8 *); + void (*setbcnrp)(struct net_device *, int, u8); }; #endif /* __NET_DCBNL_H__ */ diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 758419c6f59b..b2bda3f610df 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -55,14 +55,15 @@ MODULE_LICENSE("GPL"); /* DCB netlink attributes policy */ static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { - [DCB_ATTR_IFNAME] = {.type = NLA_STRING, .len = IFNAMSIZ - 1}, - [DCB_ATTR_STATE] = {.type = NLA_U8}, - [DCB_ATTR_PFC_CFG] = {.type = NLA_NESTED}, - [DCB_ATTR_PG_CFG] = {.type = NLA_NESTED}, - [DCB_ATTR_SET_ALL] = {.type = NLA_U8}, + [DCB_ATTR_IFNAME] = {.type = NLA_NUL_STRING, .len = IFNAMSIZ - 1}, + [DCB_ATTR_STATE] = {.type = NLA_U8}, + [DCB_ATTR_PFC_CFG] = {.type = NLA_NESTED}, + [DCB_ATTR_PG_CFG] = {.type = NLA_NESTED}, + [DCB_ATTR_SET_ALL] = {.type = NLA_U8}, [DCB_ATTR_PERM_HWADDR] = {.type = NLA_FLAG}, - [DCB_ATTR_CAP] = {.type = NLA_NESTED}, - [DCB_ATTR_PFC_STATE] = {.type = NLA_U8}, + [DCB_ATTR_CAP] = {.type = NLA_NESTED}, + [DCB_ATTR_PFC_STATE] = {.type = NLA_U8}, + [DCB_ATTR_BCN] = {.type = NLA_NESTED}, }; /* DCB priority flow control to User Priority nested attributes */ @@ -128,6 +129,33 @@ static struct nla_policy dcbnl_numtcs_nest[DCB_NUMTCS_ATTR_MAX + 1] = { [DCB_NUMTCS_ATTR_PFC] = {.type = NLA_U8}, }; +/* DCB BCN nested attributes. */ +static struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = { + [DCB_BCN_ATTR_RP_0] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_1] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_2] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_3] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_4] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_5] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_6] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_7] = {.type = NLA_U8}, + [DCB_BCN_ATTR_RP_ALL] = {.type = NLA_FLAG}, + [DCB_BCN_ATTR_ALPHA] = {.type = NLA_U32}, + [DCB_BCN_ATTR_BETA] = {.type = NLA_U32}, + [DCB_BCN_ATTR_GD] = {.type = NLA_U32}, + [DCB_BCN_ATTR_GI] = {.type = NLA_U32}, + [DCB_BCN_ATTR_TMAX] = {.type = NLA_U32}, + [DCB_BCN_ATTR_TD] = {.type = NLA_U32}, + [DCB_BCN_ATTR_RMIN] = {.type = NLA_U32}, + [DCB_BCN_ATTR_W] = {.type = NLA_U32}, + [DCB_BCN_ATTR_RD] = {.type = NLA_U32}, + [DCB_BCN_ATTR_RU] = {.type = NLA_U32}, + [DCB_BCN_ATTR_WRTT] = {.type = NLA_U32}, + [DCB_BCN_ATTR_RI] = {.type = NLA_U32}, + [DCB_BCN_ATTR_C] = {.type = NLA_U32}, + [DCB_BCN_ATTR_ALL] = {.type = NLA_FLAG}, +}; + /* standard netlink reply call */ static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid, u32 seq, u16 flags) @@ -843,6 +871,130 @@ static int dcbnl_pgrx_setcfg(struct net_device *netdev, struct nlattr **tb, return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 1); } +static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *bcn_nest; + struct nlattr *bcn_tb[DCB_BCN_ATTR_MAX + 1]; + u8 value_byte; + u32 value_integer; + int ret = -EINVAL; + bool getall = false; + int i; + + if (!tb[DCB_ATTR_BCN] || !netdev->dcbnl_ops->getbcnrp || + !netdev->dcbnl_ops->getbcncfg) + return ret; + + ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX, + tb[DCB_ATTR_BCN], dcbnl_bcn_nest); + + if (ret) + goto err_out; + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) + goto err_out; + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_BCN_GCFG; + + bcn_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_BCN); + if (!bcn_nest) + goto err; + + if (bcn_tb[DCB_BCN_ATTR_ALL]) + getall = true; + + for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) { + if (!getall && !bcn_tb[i]) + continue; + + netdev->dcbnl_ops->getbcnrp(netdev, i - DCB_BCN_ATTR_RP_0, + &value_byte); + ret = nla_put_u8(dcbnl_skb, i, value_byte); + if (ret) + goto err_bcn; + } + + for (i = DCB_BCN_ATTR_ALPHA; i <= DCB_BCN_ATTR_RI; i++) { + if (!getall && !bcn_tb[i]) + continue; + + netdev->dcbnl_ops->getbcncfg(netdev, i, + &value_integer); + ret = nla_put_u32(dcbnl_skb, i, value_integer); + if (ret) + goto err_bcn; + } + + nla_nest_end(dcbnl_skb, bcn_nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) + goto err; + + return 0; + +err_bcn: + nla_nest_cancel(dcbnl_skb, bcn_nest); +nlmsg_failure: +err: + kfree(dcbnl_skb); +err_out: + ret = -EINVAL; + return ret; +} + +static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct nlattr *data[DCB_BCN_ATTR_MAX + 1]; + int i; + int ret = -EINVAL; + u8 value_byte; + u32 value_int; + + if (!tb[DCB_ATTR_BCN] || !netdev->dcbnl_ops->setbcncfg + || !netdev->dcbnl_ops->setbcnrp) + return ret; + + ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX, + tb[DCB_ATTR_BCN], + dcbnl_pfc_up_nest); + if (ret) + goto err; + + for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) { + if (data[i] == NULL) + continue; + value_byte = nla_get_u8(data[i]); + netdev->dcbnl_ops->setbcnrp(netdev, + data[i]->nla_type - DCB_BCN_ATTR_RP_0, value_byte); + } + + for (i = DCB_BCN_ATTR_ALPHA; i <= DCB_BCN_ATTR_RI; i++) { + if (data[i] == NULL) + continue; + value_int = nla_get_u32(data[i]); + netdev->dcbnl_ops->setbcncfg(netdev, + i, value_int); + } + + ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_BCN_SCFG, DCB_ATTR_BCN, + pid, seq, flags); +err: + return ret; +} + static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); @@ -891,6 +1043,10 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ret = dcbnl_pgrx_getcfg(netdev, tb, pid, nlh->nlmsg_seq, nlh->nlmsg_flags); goto out; + case DCB_CMD_BCN_GCFG: + ret = dcbnl_bcn_getcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; case DCB_CMD_SSTATE: ret = dcbnl_setstate(netdev, tb, pid, nlh->nlmsg_seq, nlh->nlmsg_flags); @@ -932,6 +1088,10 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ret = dcbnl_setpfcstate(netdev, tb, pid, nlh->nlmsg_seq, nlh->nlmsg_flags); goto out; + case DCB_CMD_BCN_SCFG: + ret = dcbnl_bcn_setcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; default: goto errout; } -- cgit v1.2.3 From 875065491fba8eb13219f16c36e79a6fb4e15c68 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 18 Nov 2008 20:50:34 +0000 Subject: ASoC: Rename snd_soc_card to snd_soc_machine One of the issues with the ASoC v1 API which has been addressed in the ASoC v2 work that Liam Girdwood has done is that the ALSA card provided by ASoC is distributed around the ASoC structures. For example, machine wide data such as the struct snd_card are maintained as part of the CODEC data structure, preventing the use of multiple codecs. This has been addressed by refactoring the data structures so that all the data for the ALSA card is contained in a single structure snd_soc_card which replaces the existing snd_soc_machine and snd_soc_device. Begin the process of backporting this by renaming struct snd_soc_machine to struct snd_soc_card, better reflecting its function and bringing it closer to standard ALSA terminology. Signed-off-by: Mark Brown --- Documentation/sound/alsa/soc/machine.txt | 8 +-- include/sound/soc.h | 8 +-- sound/soc/atmel/playpaq_wm8510.c | 4 +- sound/soc/atmel/sam9g20_wm8731.c | 4 +- sound/soc/au1x/sample-ac97.c | 4 +- sound/soc/blackfin/bf5xx-ad1980.c | 6 +-- sound/soc/blackfin/bf5xx-ad73311.c | 6 +-- sound/soc/blackfin/bf5xx-ssm2602.c | 6 +-- sound/soc/davinci/davinci-evm.c | 4 +- sound/soc/davinci/davinci-i2s.c | 8 +-- sound/soc/davinci/davinci-sffsdr.c | 4 +- sound/soc/fsl/fsl_dma.c | 2 +- sound/soc/fsl/mpc8610_hpcd.c | 6 +-- sound/soc/fsl/soc-of-simple.c | 10 ++-- sound/soc/omap/n810.c | 4 +- sound/soc/omap/omap2evm.c | 4 +- sound/soc/omap/omap3beagle.c | 4 +- sound/soc/omap/osk5912.c | 4 +- sound/soc/omap/overo.c | 4 +- sound/soc/pxa/corgi.c | 4 +- sound/soc/pxa/e800_wm9712.c | 6 +-- sound/soc/pxa/em-x270.c | 4 +- sound/soc/pxa/palm27x.c | 4 +- sound/soc/pxa/poodle.c | 4 +- sound/soc/pxa/spitz.c | 4 +- sound/soc/pxa/tosa.c | 6 +-- sound/soc/s3c24xx/ln2440sbc_alc650.c | 6 +-- sound/soc/s3c24xx/neo1973_wm8753.c | 6 +-- sound/soc/s3c24xx/s3c24xx_uda134x.c | 4 +- sound/soc/s3c24xx/smdk2443_wm9710.c | 6 +-- sound/soc/sh/sh7760-ac97.c | 4 +- sound/soc/soc-core.c | 90 ++++++++++++++++---------------- sound/soc/soc-dapm.c | 6 +-- 33 files changed, 127 insertions(+), 127 deletions(-) (limited to 'include') diff --git a/Documentation/sound/alsa/soc/machine.txt b/Documentation/sound/alsa/soc/machine.txt index f370e7db86af..4a9f51e2905c 100644 --- a/Documentation/sound/alsa/soc/machine.txt +++ b/Documentation/sound/alsa/soc/machine.txt @@ -9,7 +9,7 @@ the audio subsystem with the kernel as a platform device and is represented by the following struct:- /* SoC machine */ -struct snd_soc_machine { +struct snd_soc_card {_ char *name; int (*probe)(struct platform_device *pdev); @@ -67,10 +67,10 @@ static struct snd_soc_dai_link corgi_dai = { .ops = &corgi_ops, }; -struct snd_soc_machine then sets up the machine with it's DAIs. e.g. +struct snd_soc_card then sets up the machine with it's DAIs. e.g. /* corgi audio machine driver */ -static struct snd_soc_machine snd_soc_machine_corgi = { +static struct snd_soc_card snd_soc_corgi = { .name = "Corgi", .dai_link = &corgi_dai, .num_links = 1, @@ -90,7 +90,7 @@ static struct wm8731_setup_data corgi_wm8731_setup = { /* corgi audio subsystem */ static struct snd_soc_device corgi_snd_devdata = { - .machine = &snd_soc_machine_corgi, + .machine = &snd_soc_corgi, .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm8731, .codec_data = &corgi_wm8731_setup, diff --git a/include/sound/soc.h b/include/sound/soc.h index 077dfe4e51f0..3be17b3c650c 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -482,8 +482,8 @@ struct snd_soc_dai_link { struct snd_pcm *pcm; }; -/* SoC machine */ -struct snd_soc_machine { +/* SoC card */ +struct snd_soc_card { char *name; int (*probe)(struct platform_device *pdev); @@ -497,7 +497,7 @@ struct snd_soc_machine { int (*resume_post)(struct platform_device *pdev); /* callbacks */ - int (*set_bias_level)(struct snd_soc_machine *, + int (*set_bias_level)(struct snd_soc_card *, enum snd_soc_bias_level level); /* CPU <--> Codec DAI links */ @@ -508,7 +508,7 @@ struct snd_soc_machine { /* SoC Device - the audio subsystem */ struct snd_soc_device { struct device *dev; - struct snd_soc_machine *machine; + struct snd_soc_card *card; struct snd_soc_platform *platform; struct snd_soc_codec *codec; struct snd_soc_codec_device *codec_dev; diff --git a/sound/soc/atmel/playpaq_wm8510.c b/sound/soc/atmel/playpaq_wm8510.c index ea7935d2a66d..d40b5a52a8d2 100644 --- a/sound/soc/atmel/playpaq_wm8510.c +++ b/sound/soc/atmel/playpaq_wm8510.c @@ -361,7 +361,7 @@ static struct snd_soc_dai_link playpaq_wm8510_dai = { -static struct snd_soc_machine snd_soc_machine_playpaq = { +static struct snd_soc_card snd_soc_playpaq = { .name = "LRS_PlayPaq_WM8510", .dai_link = &playpaq_wm8510_dai, .num_links = 1, @@ -377,7 +377,7 @@ static struct wm8510_setup_data playpaq_wm8510_setup = { static struct snd_soc_device playpaq_wm8510_snd_devdata = { - .machine = &snd_soc_machine_playpaq, + .card = &snd_soc_playpaq, .platform = &at32_soc_platform, .codec_dev = &soc_codec_dev_wm8510, .codec_data = &playpaq_wm8510_setup, diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c index 710addcc66b3..fdc1d0206e0b 100644 --- a/sound/soc/atmel/sam9g20_wm8731.c +++ b/sound/soc/atmel/sam9g20_wm8731.c @@ -242,7 +242,7 @@ static struct snd_soc_dai_link at91sam9g20ek_dai = { .ops = &at91sam9g20ek_ops, }; -static struct snd_soc_machine snd_soc_machine_at91sam9g20ek = { +static struct snd_soc_card snd_soc_at91sam9g20ek = { .name = "WM8731", .dai_link = &at91sam9g20ek_dai, .num_links = 1, @@ -254,7 +254,7 @@ static struct wm8731_setup_data at91sam9g20ek_wm8731_setup = { }; static struct snd_soc_device at91sam9g20ek_snd_devdata = { - .machine = &snd_soc_machine_at91sam9g20ek, + .card = &snd_soc_at91sam9g20ek, .platform = &atmel_soc_platform, .codec_dev = &soc_codec_dev_wm8731, .codec_data = &at91sam9g20ek_wm8731_setup, diff --git a/sound/soc/au1x/sample-ac97.c b/sound/soc/au1x/sample-ac97.c index f75ae7f62c3d..27683eb7905e 100644 --- a/sound/soc/au1x/sample-ac97.c +++ b/sound/soc/au1x/sample-ac97.c @@ -42,14 +42,14 @@ static struct snd_soc_dai_link au1xpsc_sample_ac97_dai = { .ops = NULL, }; -static struct snd_soc_machine au1xpsc_sample_ac97_machine = { +static struct snd_soc_card au1xpsc_sample_ac97_machine = { .name = "Au1xxx PSC AC97 Audio", .dai_link = &au1xpsc_sample_ac97_dai, .num_links = 1, }; static struct snd_soc_device au1xpsc_sample_ac97_devdata = { - .machine = &au1xpsc_sample_ac97_machine, + .card = &au1xpsc_sample_ac97_machine, .platform = &au1xpsc_soc_platform, /* see dbdma2.c */ .codec_dev = &soc_codec_dev_ac97, }; diff --git a/sound/soc/blackfin/bf5xx-ad1980.c b/sound/soc/blackfin/bf5xx-ad1980.c index 124425d22320..36c569a43ce1 100644 --- a/sound/soc/blackfin/bf5xx-ad1980.c +++ b/sound/soc/blackfin/bf5xx-ad1980.c @@ -43,7 +43,7 @@ #include "bf5xx-ac97-pcm.h" #include "bf5xx-ac97.h" -static struct snd_soc_machine bf5xx_board; +static struct snd_soc_card bf5xx_board; static int bf5xx_board_startup(struct snd_pcm_substream *substream) { @@ -67,14 +67,14 @@ static struct snd_soc_dai_link bf5xx_board_dai = { .ops = &bf5xx_board_ops, }; -static struct snd_soc_machine bf5xx_board = { +static struct snd_soc_card bf5xx_board = { .name = "bf5xx-board", .dai_link = &bf5xx_board_dai, .num_links = 1, }; static struct snd_soc_device bf5xx_board_snd_devdata = { - .machine = &bf5xx_board, + .card = &bf5xx_board, .platform = &bf5xx_ac97_soc_platform, .codec_dev = &soc_codec_dev_ad1980, }; diff --git a/sound/soc/blackfin/bf5xx-ad73311.c b/sound/soc/blackfin/bf5xx-ad73311.c index 47da49b9aeac..57da14799375 100644 --- a/sound/soc/blackfin/bf5xx-ad73311.c +++ b/sound/soc/blackfin/bf5xx-ad73311.c @@ -65,7 +65,7 @@ #define GPIO_SE CONFIG_SND_BFIN_AD73311_SE -static struct snd_soc_machine bf5xx_ad73311; +static struct snd_soc_card bf5xx_ad73311; static int snd_ad73311_startup(void) { @@ -190,7 +190,7 @@ static struct snd_soc_dai_link bf5xx_ad73311_dai = { .ops = &bf5xx_ad73311_ops, }; -static struct snd_soc_machine bf5xx_ad73311 = { +static struct snd_soc_card bf5xx_ad73311 = { .name = "bf5xx_ad73311", .probe = bf5xx_probe, .dai_link = &bf5xx_ad73311_dai, @@ -198,7 +198,7 @@ static struct snd_soc_machine bf5xx_ad73311 = { }; static struct snd_soc_device bf5xx_ad73311_snd_devdata = { - .machine = &bf5xx_ad73311, + .card = &bf5xx_ad73311, .platform = &bf5xx_i2s_soc_platform, .codec_dev = &soc_codec_dev_ad73311, }; diff --git a/sound/soc/blackfin/bf5xx-ssm2602.c b/sound/soc/blackfin/bf5xx-ssm2602.c index 744a90e765d9..0078dfcd95b9 100644 --- a/sound/soc/blackfin/bf5xx-ssm2602.c +++ b/sound/soc/blackfin/bf5xx-ssm2602.c @@ -44,7 +44,7 @@ #include "bf5xx-i2s-pcm.h" #include "bf5xx-i2s.h" -static struct snd_soc_machine bf5xx_ssm2602; +static struct snd_soc_card bf5xx_ssm2602; static int bf5xx_ssm2602_startup(struct snd_pcm_substream *substream) { @@ -135,14 +135,14 @@ static struct ssm2602_setup_data bf5xx_ssm2602_setup = { .i2c_address = 0x1b, }; -static struct snd_soc_machine bf5xx_ssm2602 = { +static struct snd_soc_card bf5xx_ssm2602 = { .name = "bf5xx_ssm2602", .dai_link = &bf5xx_ssm2602_dai, .num_links = 1, }; static struct snd_soc_device bf5xx_ssm2602_snd_devdata = { - .machine = &bf5xx_ssm2602, + .card = &bf5xx_ssm2602, .platform = &bf5xx_i2s_soc_platform, .codec_dev = &soc_codec_dev_ssm2602, .codec_data = &bf5xx_ssm2602_setup, diff --git a/sound/soc/davinci/davinci-evm.c b/sound/soc/davinci/davinci-evm.c index 9e6062cd6b59..2ce34d44b15c 100644 --- a/sound/soc/davinci/davinci-evm.c +++ b/sound/soc/davinci/davinci-evm.c @@ -128,7 +128,7 @@ static struct snd_soc_dai_link evm_dai = { }; /* davinci-evm audio machine driver */ -static struct snd_soc_machine snd_soc_machine_evm = { +static struct snd_soc_card snd_soc_card_evm = { .name = "DaVinci EVM", .dai_link = &evm_dai, .num_links = 1, @@ -142,7 +142,7 @@ static struct aic3x_setup_data evm_aic3x_setup = { /* evm audio subsystem */ static struct snd_soc_device evm_snd_devdata = { - .machine = &snd_soc_machine_evm, + .card = &snd_soc_card_evm, .platform = &davinci_soc_platform, .codec_dev = &soc_codec_dev_aic3x, .codec_data = &evm_aic3x_setup, diff --git a/sound/soc/davinci/davinci-i2s.c b/sound/soc/davinci/davinci-i2s.c index 11c20d0b7bcc..95df51e803b4 100644 --- a/sound/soc/davinci/davinci-i2s.c +++ b/sound/soc/davinci/davinci-i2s.c @@ -375,8 +375,8 @@ static int davinci_i2s_probe(struct platform_device *pdev, struct snd_soc_dai *dai) { struct snd_soc_device *socdev = platform_get_drvdata(pdev); - struct snd_soc_machine *machine = socdev->machine; - struct snd_soc_dai *cpu_dai = machine->dai_link[pdev->id].cpu_dai; + struct snd_soc_card *card = socdev->card; + struct snd_soc_dai *cpu_dai = card->dai_link[pdev->id].cpu_dai; struct davinci_mcbsp_dev *dev; struct resource *mem, *ioarea; struct evm_snd_platform_data *pdata; @@ -437,8 +437,8 @@ static void davinci_i2s_remove(struct platform_device *pdev, struct snd_soc_dai *dai) { struct snd_soc_device *socdev = platform_get_drvdata(pdev); - struct snd_soc_machine *machine = socdev->machine; - struct snd_soc_dai *cpu_dai = machine->dai_link[pdev->id].cpu_dai; + struct snd_soc_card *card = socdev->card; + struct snd_soc_dai *cpu_dai = card->dai_link[pdev->id].cpu_dai; struct davinci_mcbsp_dev *dev = cpu_dai->private_data; struct resource *mem; diff --git a/sound/soc/davinci/davinci-sffsdr.c b/sound/soc/davinci/davinci-sffsdr.c index 69a8a769f4d8..fa38f9cd3506 100644 --- a/sound/soc/davinci/davinci-sffsdr.c +++ b/sound/soc/davinci/davinci-sffsdr.c @@ -73,7 +73,7 @@ static struct snd_soc_dai_link sffsdr_dai = { }; /* davinci-sffsdr audio machine driver */ -static struct snd_soc_machine snd_soc_machine_sffsdr = { +static struct snd_soc_card snd_soc_sffsdr = { .name = "DaVinci SFFSDR", .dai_link = &sffsdr_dai, .num_links = 1, @@ -89,7 +89,7 @@ static struct pcm3008_setup_data sffsdr_pcm3008_setup = { /* sffsdr audio subsystem */ static struct snd_soc_device sffsdr_snd_devdata = { - .machine = &snd_soc_machine_sffsdr, + .card = &snd_soc_sffsdr, .platform = &davinci_soc_platform, .codec_dev = &soc_codec_dev_pcm3008, .codec_data = &sffsdr_pcm3008_setup, diff --git a/sound/soc/fsl/fsl_dma.c b/sound/soc/fsl/fsl_dma.c index d2d3da9729f2..bf92331b4768 100644 --- a/sound/soc/fsl/fsl_dma.c +++ b/sound/soc/fsl/fsl_dma.c @@ -284,7 +284,7 @@ static irqreturn_t fsl_dma_isr(int irq, void *dev_id) * fsl_dma_new: initialize this PCM driver. * * This function is called when the codec driver calls snd_soc_new_pcms(), - * once for each .dai_link in the machine driver's snd_soc_machine + * once for each .dai_link in the machine driver's snd_soc_card * structure. */ static int fsl_dma_new(struct snd_card *card, struct snd_soc_dai *dai, diff --git a/sound/soc/fsl/mpc8610_hpcd.c b/sound/soc/fsl/mpc8610_hpcd.c index 94f89debde1f..1cf4d6eeb538 100644 --- a/sound/soc/fsl/mpc8610_hpcd.c +++ b/sound/soc/fsl/mpc8610_hpcd.c @@ -29,7 +29,7 @@ struct mpc8610_hpcd_data { struct snd_soc_device sound_devdata; struct snd_soc_dai_link dai; - struct snd_soc_machine machine; + struct snd_soc_card machine; unsigned int dai_format; unsigned int codec_clk_direction; unsigned int cpu_clk_direction; @@ -185,7 +185,7 @@ static struct snd_soc_ops mpc8610_hpcd_ops = { /** * mpc8610_hpcd_machine: ASoC machine data */ -static struct snd_soc_machine mpc8610_hpcd_machine = { +static struct snd_soc_card mpc8610_hpcd_machine = { .probe = mpc8610_hpcd_machine_probe, .remove = mpc8610_hpcd_machine_remove, .name = "MPC8610 HPCD", @@ -465,7 +465,7 @@ static int mpc8610_hpcd_probe(struct of_device *ofdev, goto error; } - machine_data->sound_devdata.machine = &mpc8610_hpcd_machine; + machine_data->sound_devdata.card = &mpc8610_hpcd_machine; machine_data->sound_devdata.codec_dev = &soc_codec_device_cs4270; machine_data->sound_devdata.platform = &fsl_soc_platform; diff --git a/sound/soc/fsl/soc-of-simple.c b/sound/soc/fsl/soc-of-simple.c index 0382fdac51cd..53be6491320a 100644 --- a/sound/soc/fsl/soc-of-simple.c +++ b/sound/soc/fsl/soc-of-simple.c @@ -31,7 +31,7 @@ struct of_snd_soc_device { int id; struct list_head list; struct snd_soc_device device; - struct snd_soc_machine machine; + struct snd_soc_card card; struct snd_soc_dai_link dai_link; struct platform_device *pdev; struct device_node *platform_node; @@ -58,9 +58,9 @@ of_snd_soc_get_device(struct device_node *codec_node) /* Initialize the structure and add it to the global list */ of_soc->codec_node = codec_node; of_soc->id = of_snd_soc_next_index++; - of_soc->machine.dai_link = &of_soc->dai_link; - of_soc->machine.num_links = 1; - of_soc->device.machine = &of_soc->machine; + of_soc->card.dai_link = &of_soc->dai_link; + of_soc->card.num_links = 1; + of_soc->device.card = &of_soc->card; of_soc->dai_link.ops = &of_snd_soc_ops; list_add(&of_soc->list, &of_snd_soc_device_list); @@ -159,7 +159,7 @@ int of_snd_soc_register_platform(struct snd_soc_platform *platform, of_soc->platform_node = node; of_soc->dai_link.cpu_dai = cpu_dai; of_soc->device.platform = platform; - of_soc->machine.name = of_soc->dai_link.cpu_dai->name; + of_soc->card.name = of_soc->dai_link.cpu_dai->name; /* Now try to register the SoC device */ of_snd_soc_register_device(of_soc); diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c index fae3ad36e0bf..d216b4f9e14e 100644 --- a/sound/soc/omap/n810.c +++ b/sound/soc/omap/n810.c @@ -282,7 +282,7 @@ static struct snd_soc_dai_link n810_dai = { }; /* Audio machine driver */ -static struct snd_soc_machine snd_soc_machine_n810 = { +static struct snd_soc_card snd_soc_n810 = { .name = "N810", .dai_link = &n810_dai, .num_links = 1, @@ -298,7 +298,7 @@ static struct aic3x_setup_data n810_aic33_setup = { /* Audio subsystem */ static struct snd_soc_device n810_snd_devdata = { - .machine = &snd_soc_machine_n810, + .card = &snd_soc_n810, .platform = &omap_soc_platform, .codec_dev = &soc_codec_dev_aic3x, .codec_data = &n810_aic33_setup, diff --git a/sound/soc/omap/omap2evm.c b/sound/soc/omap/omap2evm.c index c37621357d00..5bea31157a94 100644 --- a/sound/soc/omap/omap2evm.c +++ b/sound/soc/omap/omap2evm.c @@ -90,7 +90,7 @@ static struct snd_soc_dai_link omap2evm_dai = { }; /* Audio machine driver */ -static struct snd_soc_machine snd_soc_machine_omap2evm = { +static struct snd_soc_card snd_soc_omap2evm = { .name = "omap2evm", .dai_link = &omap2evm_dai, .num_links = 1, @@ -98,7 +98,7 @@ static struct snd_soc_machine snd_soc_machine_omap2evm = { /* Audio subsystem */ static struct snd_soc_device omap2evm_snd_devdata = { - .machine = &snd_soc_machine_omap2evm, + .card = &snd_soc_omap2evm, .platform = &omap_soc_platform, .codec_dev = &soc_codec_dev_twl4030, }; diff --git a/sound/soc/omap/omap3beagle.c b/sound/soc/omap/omap3beagle.c index ec84a9bbc563..3ed25464627f 100644 --- a/sound/soc/omap/omap3beagle.c +++ b/sound/soc/omap/omap3beagle.c @@ -88,7 +88,7 @@ static struct snd_soc_dai_link omap3beagle_dai = { }; /* Audio machine driver */ -static struct snd_soc_machine snd_soc_machine_omap3beagle = { +static struct snd_soc_card snd_soc_omap3beagle = { .name = "omap3beagle", .dai_link = &omap3beagle_dai, .num_links = 1, @@ -96,7 +96,7 @@ static struct snd_soc_machine snd_soc_machine_omap3beagle = { /* Audio subsystem */ static struct snd_soc_device omap3beagle_snd_devdata = { - .machine = &snd_soc_machine_omap3beagle, + .card = &snd_soc_omap3beagle, .platform = &omap_soc_platform, .codec_dev = &soc_codec_dev_twl4030, }; diff --git a/sound/soc/omap/osk5912.c b/sound/soc/omap/osk5912.c index 0fe733796898..7a8f14d0c772 100644 --- a/sound/soc/omap/osk5912.c +++ b/sound/soc/omap/osk5912.c @@ -143,7 +143,7 @@ static struct snd_soc_dai_link osk_dai = { }; /* Audio machine driver */ -static struct snd_soc_machine snd_soc_machine_osk = { +static struct snd_soc_card snd_soc_card_osk = { .name = "OSK5912", .dai_link = &osk_dai, .num_links = 1, @@ -151,7 +151,7 @@ static struct snd_soc_machine snd_soc_machine_osk = { /* Audio subsystem */ static struct snd_soc_device osk_snd_devdata = { - .machine = &snd_soc_machine_osk, + .card = &snd_soc_card_osk, .platform = &omap_soc_platform, .codec_dev = &soc_codec_dev_tlv320aic23, }; diff --git a/sound/soc/omap/overo.c b/sound/soc/omap/overo.c index c26d1de7da51..eea0c372bb3f 100644 --- a/sound/soc/omap/overo.c +++ b/sound/soc/omap/overo.c @@ -88,7 +88,7 @@ static struct snd_soc_dai_link overo_dai = { }; /* Audio machine driver */ -static struct snd_soc_machine snd_soc_machine_overo = { +static struct snd_soc_card snd_soc_card_overo = { .name = "overo", .dai_link = &overo_dai, .num_links = 1, @@ -96,7 +96,7 @@ static struct snd_soc_machine snd_soc_machine_overo = { /* Audio subsystem */ static struct snd_soc_device overo_snd_devdata = { - .machine = &snd_soc_machine_overo, + .card = &snd_soc_card_overo, .platform = &omap_soc_platform, .codec_dev = &soc_codec_dev_twl4030, }; diff --git a/sound/soc/pxa/corgi.c b/sound/soc/pxa/corgi.c index 2718eaf7895f..647f056a3cb3 100644 --- a/sound/soc/pxa/corgi.c +++ b/sound/soc/pxa/corgi.c @@ -314,7 +314,7 @@ static struct snd_soc_dai_link corgi_dai = { }; /* corgi audio machine driver */ -static struct snd_soc_machine snd_soc_machine_corgi = { +static struct snd_soc_card snd_soc_corgi = { .name = "Corgi", .dai_link = &corgi_dai, .num_links = 1, @@ -328,7 +328,7 @@ static struct wm8731_setup_data corgi_wm8731_setup = { /* corgi audio subsystem */ static struct snd_soc_device corgi_snd_devdata = { - .machine = &snd_soc_machine_corgi, + .card = &snd_soc_corgi, .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm8731, .codec_data = &corgi_wm8731_setup, diff --git a/sound/soc/pxa/e800_wm9712.c b/sound/soc/pxa/e800_wm9712.c index 6781c5be242f..60c64861512a 100644 --- a/sound/soc/pxa/e800_wm9712.c +++ b/sound/soc/pxa/e800_wm9712.c @@ -29,7 +29,7 @@ #include "pxa2xx-pcm.h" #include "pxa2xx-ac97.h" -static struct snd_soc_machine e800; +static struct snd_soc_card e800; static struct snd_soc_dai_link e800_dai[] = { { @@ -40,14 +40,14 @@ static struct snd_soc_dai_link e800_dai[] = { }, }; -static struct snd_soc_machine e800 = { +static struct snd_soc_card e800 = { .name = "Toshiba e800", .dai_link = e800_dai, .num_links = ARRAY_SIZE(e800_dai), }; static struct snd_soc_device e800_snd_devdata = { - .machine = &e800, + .card = &e800, .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm9712, }; diff --git a/sound/soc/pxa/em-x270.c b/sound/soc/pxa/em-x270.c index e6ff6929ab4b..4a61925c3104 100644 --- a/sound/soc/pxa/em-x270.c +++ b/sound/soc/pxa/em-x270.c @@ -53,14 +53,14 @@ static struct snd_soc_dai_link em_x270_dai[] = { }, }; -static struct snd_soc_machine em_x270 = { +static struct snd_soc_card em_x270 = { .name = "EM-X270", .dai_link = em_x270_dai, .num_links = ARRAY_SIZE(em_x270_dai), }; static struct snd_soc_device em_x270_snd_devdata = { - .machine = &em_x270, + .card = &em_x270, .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm9712, }; diff --git a/sound/soc/pxa/palm27x.c b/sound/soc/pxa/palm27x.c index e364abc700db..3bb8879ac8a2 100644 --- a/sound/soc/pxa/palm27x.c +++ b/sound/soc/pxa/palm27x.c @@ -189,14 +189,14 @@ static struct snd_soc_dai_link palm27x_dai[] = { }, }; -static struct snd_soc_machine palm27x_asoc = { +static struct snd_soc_card palm27x_asoc = { .name = "Palm/PXA27x", .dai_link = palm27x_dai, .num_links = ARRAY_SIZE(palm27x_dai), }; static struct snd_soc_device palm27x_snd_devdata = { - .machine = &palm27x_asoc, + .card = &palm27x_asoc, .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm9712, }; diff --git a/sound/soc/pxa/poodle.c b/sound/soc/pxa/poodle.c index 4d9930c52789..03b510ab2824 100644 --- a/sound/soc/pxa/poodle.c +++ b/sound/soc/pxa/poodle.c @@ -276,7 +276,7 @@ static struct snd_soc_dai_link poodle_dai = { }; /* poodle audio machine driver */ -static struct snd_soc_machine snd_soc_machine_poodle = { +static struct snd_soc_card snd_soc_poodle = { .name = "Poodle", .dai_link = &poodle_dai, .num_links = 1, @@ -290,7 +290,7 @@ static struct wm8731_setup_data poodle_wm8731_setup = { /* poodle audio subsystem */ static struct snd_soc_device poodle_snd_devdata = { - .machine = &snd_soc_machine_poodle, + .card = &snd_soc_poodle, .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm8731, .codec_data = &poodle_wm8731_setup, diff --git a/sound/soc/pxa/spitz.c b/sound/soc/pxa/spitz.c index d307b6757e95..579d93368f14 100644 --- a/sound/soc/pxa/spitz.c +++ b/sound/soc/pxa/spitz.c @@ -319,7 +319,7 @@ static struct snd_soc_dai_link spitz_dai = { }; /* spitz audio machine driver */ -static struct snd_soc_machine snd_soc_machine_spitz = { +static struct snd_soc_card snd_soc_spitz = { .name = "Spitz", .dai_link = &spitz_dai, .num_links = 1, @@ -333,7 +333,7 @@ static struct wm8750_setup_data spitz_wm8750_setup = { /* spitz audio subsystem */ static struct snd_soc_device spitz_snd_devdata = { - .machine = &snd_soc_machine_spitz, + .card = &snd_soc_spitz, .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm8750, .codec_data = &spitz_wm8750_setup, diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c index afefe41b8c46..9d9be5a14d14 100644 --- a/sound/soc/pxa/tosa.c +++ b/sound/soc/pxa/tosa.c @@ -38,7 +38,7 @@ #include "pxa2xx-pcm.h" #include "pxa2xx-ac97.h" -static struct snd_soc_machine tosa; +static struct snd_soc_card tosa; #define TOSA_HP 0 #define TOSA_MIC_INT 1 @@ -230,14 +230,14 @@ static struct snd_soc_dai_link tosa_dai[] = { }, }; -static struct snd_soc_machine tosa = { +static struct snd_soc_card tosa = { .name = "Tosa", .dai_link = tosa_dai, .num_links = ARRAY_SIZE(tosa_dai), }; static struct snd_soc_device tosa_snd_devdata = { - .machine = &tosa, + .card = &tosa, .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm9712, }; diff --git a/sound/soc/s3c24xx/ln2440sbc_alc650.c b/sound/soc/s3c24xx/ln2440sbc_alc650.c index 4eab2c19c454..a70cbc0fa070 100644 --- a/sound/soc/s3c24xx/ln2440sbc_alc650.c +++ b/sound/soc/s3c24xx/ln2440sbc_alc650.c @@ -27,7 +27,7 @@ #include "s3c24xx-pcm.h" #include "s3c24xx-ac97.h" -static struct snd_soc_machine ln2440sbc; +static struct snd_soc_card ln2440sbc; static struct snd_soc_dai_link ln2440sbc_dai[] = { { @@ -38,14 +38,14 @@ static struct snd_soc_dai_link ln2440sbc_dai[] = { }, }; -static struct snd_soc_machine ln2440sbc = { +static struct snd_soc_card ln2440sbc = { .name = "LN2440SBC", .dai_link = ln2440sbc_dai, .num_links = ARRAY_SIZE(ln2440sbc_dai), }; static struct snd_soc_device ln2440sbc_snd_ac97_devdata = { - .machine = &ln2440sbc, + .card = &ln2440sbc, .platform = &s3c24xx_soc_platform, .codec_dev = &soc_codec_dev_ac97, }; diff --git a/sound/soc/s3c24xx/neo1973_wm8753.c b/sound/soc/s3c24xx/neo1973_wm8753.c index 87ddfefcc2fb..528fc3f1b45b 100644 --- a/sound/soc/s3c24xx/neo1973_wm8753.c +++ b/sound/soc/s3c24xx/neo1973_wm8753.c @@ -59,7 +59,7 @@ #define NEO_CAPTURE_HEADSET 7 #define NEO_CAPTURE_BLUETOOTH 8 -static struct snd_soc_machine neo1973; +static struct snd_soc_card neo1973; static struct i2c_client *i2c; static int neo1973_hifi_hw_params(struct snd_pcm_substream *substream, @@ -579,7 +579,7 @@ static struct snd_soc_dai_link neo1973_dai[] = { }, }; -static struct snd_soc_machine neo1973 = { +static struct snd_soc_card neo1973 = { .name = "neo1973", .dai_link = neo1973_dai, .num_links = ARRAY_SIZE(neo1973_dai), @@ -591,7 +591,7 @@ static struct wm8753_setup_data neo1973_wm8753_setup = { }; static struct snd_soc_device neo1973_snd_devdata = { - .machine = &neo1973, + .card = &neo1973, .platform = &s3c24xx_soc_platform, .codec_dev = &soc_codec_dev_wm8753, .codec_data = &neo1973_wm8753_setup, diff --git a/sound/soc/s3c24xx/s3c24xx_uda134x.c b/sound/soc/s3c24xx/s3c24xx_uda134x.c index 92d90f2f6901..23325fca1f64 100644 --- a/sound/soc/s3c24xx/s3c24xx_uda134x.c +++ b/sound/soc/s3c24xx/s3c24xx_uda134x.c @@ -232,7 +232,7 @@ static struct snd_soc_dai_link s3c24xx_uda134x_dai_link = { .ops = &s3c24xx_uda134x_ops, }; -static struct snd_soc_machine snd_soc_machine_s3c24xx_uda134x = { +static struct snd_soc_card snd_soc_s3c24xx_uda134x = { .name = "S3C24XX_UDA134X", .dai_link = &s3c24xx_uda134x_dai_link, .num_links = 1, @@ -270,7 +270,7 @@ static struct uda134x_platform_data s3c24xx_uda134x = { }; static struct snd_soc_device s3c24xx_uda134x_snd_devdata = { - .machine = &snd_soc_machine_s3c24xx_uda134x, + .card = &snd_soc_s3c24xx_uda134x, .platform = &s3c24xx_soc_platform, .codec_dev = &soc_codec_dev_uda134x, .codec_data = &s3c24xx_uda134x, diff --git a/sound/soc/s3c24xx/smdk2443_wm9710.c b/sound/soc/s3c24xx/smdk2443_wm9710.c index 8515d6ff03f2..3d2e6a0417ec 100644 --- a/sound/soc/s3c24xx/smdk2443_wm9710.c +++ b/sound/soc/s3c24xx/smdk2443_wm9710.c @@ -23,7 +23,7 @@ #include "s3c24xx-pcm.h" #include "s3c24xx-ac97.h" -static struct snd_soc_machine smdk2443; +static struct snd_soc_card smdk2443; static struct snd_soc_dai_link smdk2443_dai[] = { { @@ -34,14 +34,14 @@ static struct snd_soc_dai_link smdk2443_dai[] = { }, }; -static struct snd_soc_machine smdk2443 = { +static struct snd_soc_card smdk2443 = { .name = "SMDK2443", .dai_link = smdk2443_dai, .num_links = ARRAY_SIZE(smdk2443_dai), }; static struct snd_soc_device smdk2443_snd_ac97_devdata = { - .machine = &smdk2443, + .card = &smdk2443, .platform = &s3c24xx_soc_platform, .codec_dev = &soc_codec_dev_ac97, }; diff --git a/sound/soc/sh/sh7760-ac97.c b/sound/soc/sh/sh7760-ac97.c index 92bfaf4774a7..8b44f9c8a9ff 100644 --- a/sound/soc/sh/sh7760-ac97.c +++ b/sound/soc/sh/sh7760-ac97.c @@ -38,14 +38,14 @@ static struct snd_soc_dai_link sh7760_ac97_dai = { .ops = NULL, }; -static struct snd_soc_machine sh7760_ac97_soc_machine = { +static struct snd_soc_card sh7760_ac97_soc_machine = { .name = "SH7760 AC97", .dai_link = &sh7760_ac97_dai, .num_links = 1, }; static struct snd_soc_device sh7760_ac97_snd_devdata = { - .machine = &sh7760_ac97_soc_machine, + .card = &sh7760_ac97_soc_machine, .platform = &sh7760_soc_platform, .codec_dev = &soc_codec_dev_ac97, }; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 9feaa7b6dc34..c5cb9516fea4 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -620,7 +620,7 @@ static struct snd_pcm_ops soc_pcm_ops = { static int soc_suspend(struct platform_device *pdev, pm_message_t state) { struct snd_soc_device *socdev = platform_get_drvdata(pdev); - struct snd_soc_machine *machine = socdev->machine; + struct snd_soc_card *card = socdev->card; struct snd_soc_platform *platform = socdev->platform; struct snd_soc_codec_device *codec_dev = socdev->codec_dev; struct snd_soc_codec *codec = socdev->codec; @@ -644,14 +644,14 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) } /* suspend all pcms */ - for (i = 0; i < machine->num_links; i++) - snd_pcm_suspend_all(machine->dai_link[i].pcm); + for (i = 0; i < card->num_links; i++) + snd_pcm_suspend_all(card->dai_link[i].pcm); - if (machine->suspend_pre) - machine->suspend_pre(pdev, state); + if (card->suspend_pre) + card->suspend_pre(pdev, state); - for (i = 0; i < machine->num_links; i++) { - struct snd_soc_dai *cpu_dai = machine->dai_link[i].cpu_dai; + for (i = 0; i < card->num_links; i++) { + struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; if (cpu_dai->suspend && cpu_dai->type != SND_SOC_DAI_AC97) cpu_dai->suspend(pdev, cpu_dai); if (platform->suspend) @@ -676,14 +676,14 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) if (codec_dev->suspend) codec_dev->suspend(pdev, state); - for (i = 0; i < machine->num_links; i++) { - struct snd_soc_dai *cpu_dai = machine->dai_link[i].cpu_dai; + for (i = 0; i < card->num_links; i++) { + struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; if (cpu_dai->suspend && cpu_dai->type == SND_SOC_DAI_AC97) cpu_dai->suspend(pdev, cpu_dai); } - if (machine->suspend_post) - machine->suspend_post(pdev, state); + if (card->suspend_post) + card->suspend_post(pdev, state); return 0; } @@ -696,7 +696,7 @@ static void soc_resume_deferred(struct work_struct *work) struct snd_soc_device *socdev = container_of(work, struct snd_soc_device, deferred_resume_work); - struct snd_soc_machine *machine = socdev->machine; + struct snd_soc_card *card = socdev->card; struct snd_soc_platform *platform = socdev->platform; struct snd_soc_codec_device *codec_dev = socdev->codec_dev; struct snd_soc_codec *codec = socdev->codec; @@ -709,11 +709,11 @@ static void soc_resume_deferred(struct work_struct *work) dev_info(socdev->dev, "starting resume work\n"); - if (machine->resume_pre) - machine->resume_pre(pdev); + if (card->resume_pre) + card->resume_pre(pdev); - for (i = 0; i < machine->num_links; i++) { - struct snd_soc_dai *cpu_dai = machine->dai_link[i].cpu_dai; + for (i = 0; i < card->num_links; i++) { + struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; if (cpu_dai->resume && cpu_dai->type == SND_SOC_DAI_AC97) cpu_dai->resume(pdev, cpu_dai); } @@ -739,16 +739,16 @@ static void soc_resume_deferred(struct work_struct *work) dai->dai_ops.digital_mute(dai, 0); } - for (i = 0; i < machine->num_links; i++) { - struct snd_soc_dai *cpu_dai = machine->dai_link[i].cpu_dai; + for (i = 0; i < card->num_links; i++) { + struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; if (cpu_dai->resume && cpu_dai->type != SND_SOC_DAI_AC97) cpu_dai->resume(pdev, cpu_dai); if (platform->resume) platform->resume(pdev, cpu_dai); } - if (machine->resume_post) - machine->resume_post(pdev); + if (card->resume_post) + card->resume_post(pdev); dev_info(socdev->dev, "resume work completed\n"); @@ -779,18 +779,18 @@ static int soc_probe(struct platform_device *pdev) { int ret = 0, i; struct snd_soc_device *socdev = platform_get_drvdata(pdev); - struct snd_soc_machine *machine = socdev->machine; + struct snd_soc_card *card = socdev->card; struct snd_soc_platform *platform = socdev->platform; struct snd_soc_codec_device *codec_dev = socdev->codec_dev; - if (machine->probe) { - ret = machine->probe(pdev); + if (card->probe) { + ret = card->probe(pdev); if (ret < 0) return ret; } - for (i = 0; i < machine->num_links; i++) { - struct snd_soc_dai *cpu_dai = machine->dai_link[i].cpu_dai; + for (i = 0; i < card->num_links; i++) { + struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; if (cpu_dai->probe) { ret = cpu_dai->probe(pdev, cpu_dai); if (ret < 0) @@ -825,13 +825,13 @@ platform_err: cpu_dai_err: for (i--; i >= 0; i--) { - struct snd_soc_dai *cpu_dai = machine->dai_link[i].cpu_dai; + struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; if (cpu_dai->remove) cpu_dai->remove(pdev, cpu_dai); } - if (machine->remove) - machine->remove(pdev); + if (card->remove) + card->remove(pdev); return ret; } @@ -841,7 +841,7 @@ static int soc_remove(struct platform_device *pdev) { int i; struct snd_soc_device *socdev = platform_get_drvdata(pdev); - struct snd_soc_machine *machine = socdev->machine; + struct snd_soc_card *card = socdev->card; struct snd_soc_platform *platform = socdev->platform; struct snd_soc_codec_device *codec_dev = socdev->codec_dev; @@ -853,14 +853,14 @@ static int soc_remove(struct platform_device *pdev) if (codec_dev->remove) codec_dev->remove(pdev); - for (i = 0; i < machine->num_links; i++) { - struct snd_soc_dai *cpu_dai = machine->dai_link[i].cpu_dai; + for (i = 0; i < card->num_links; i++) { + struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; if (cpu_dai->remove) cpu_dai->remove(pdev, cpu_dai); } - if (machine->remove) - machine->remove(pdev); + if (card->remove) + card->remove(pdev); return 0; } @@ -1212,7 +1212,7 @@ EXPORT_SYMBOL_GPL(snd_soc_test_bits); int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid) { struct snd_soc_codec *codec = socdev->codec; - struct snd_soc_machine *machine = socdev->machine; + struct snd_soc_card *card = socdev->card; int ret = 0, i; mutex_lock(&codec->mutex); @@ -1231,11 +1231,11 @@ int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid) strncpy(codec->card->driver, codec->name, sizeof(codec->card->driver)); /* create the pcms */ - for (i = 0; i < machine->num_links; i++) { - ret = soc_new_pcm(socdev, &machine->dai_link[i], i); + for (i = 0; i < card->num_links; i++) { + ret = soc_new_pcm(socdev, &card->dai_link[i], i); if (ret < 0) { printk(KERN_ERR "asoc: can't create pcm %s\n", - machine->dai_link[i].stream_name); + card->dai_link[i].stream_name); mutex_unlock(&codec->mutex); return ret; } @@ -1258,26 +1258,26 @@ EXPORT_SYMBOL_GPL(snd_soc_new_pcms); int snd_soc_register_card(struct snd_soc_device *socdev) { struct snd_soc_codec *codec = socdev->codec; - struct snd_soc_machine *machine = socdev->machine; + struct snd_soc_card *card = socdev->card; int ret = 0, i, ac97 = 0, err = 0; - for (i = 0; i < machine->num_links; i++) { - if (socdev->machine->dai_link[i].init) { - err = socdev->machine->dai_link[i].init(codec); + for (i = 0; i < card->num_links; i++) { + if (card->dai_link[i].init) { + err = card->dai_link[i].init(codec); if (err < 0) { printk(KERN_ERR "asoc: failed to init %s\n", - socdev->machine->dai_link[i].stream_name); + card->dai_link[i].stream_name); continue; } } - if (socdev->machine->dai_link[i].codec_dai->type == + if (card->dai_link[i].codec_dai->type == SND_SOC_DAI_AC97_BUS) ac97 = 1; } snprintf(codec->card->shortname, sizeof(codec->card->shortname), - "%s", machine->name); + "%s", card->name); snprintf(codec->card->longname, sizeof(codec->card->longname), - "%s (%s)", machine->name, codec->name); + "%s (%s)", card->name, codec->name); ret = snd_card_register(codec->card); if (ret < 0) { diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 0fecbb44726b..61d7d85aa578 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -1402,11 +1402,11 @@ int snd_soc_dapm_set_bias_level(struct snd_soc_device *socdev, enum snd_soc_bias_level level) { struct snd_soc_codec *codec = socdev->codec; - struct snd_soc_machine *machine = socdev->machine; + struct snd_soc_card *card = socdev->card; int ret = 0; - if (machine->set_bias_level) - ret = machine->set_bias_level(machine, level); + if (card->set_bias_level) + ret = card->set_bias_level(card, level); if (ret == 0 && codec->set_bias_level) ret = codec->set_bias_level(codec, level); -- cgit v1.2.3 From a47cbe7263236691ee0bbc392f7fd4ec0da1159f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 23 Jul 2008 14:03:07 +0100 Subject: ASoC: Move DAI structure definitions into new soc-dai.h ASoC v2 factors most of the contents of soc.h out into separate headers, including soc-dai.h for the DAI. Factor the existing DAI API out into this file in order to prepare for backporting of the ASoC v2 DAI API. Also backport some of Liam's improvements to the documentation. Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 209 ++++++++++++++++++++++++++++++++++++++++++++++++ include/sound/soc.h | 148 +--------------------------------- 2 files changed, 211 insertions(+), 146 deletions(-) create mode 100644 include/sound/soc-dai.h (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h new file mode 100644 index 000000000000..08b8f7025c64 --- /dev/null +++ b/include/sound/soc-dai.h @@ -0,0 +1,209 @@ +/* + * linux/sound/soc-dai.h -- ALSA SoC Layer + * + * Copyright: 2005-2008 Wolfson Microelectronics. PLC. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Digital Audio Interface (DAI) API. + */ + +#ifndef __LINUX_SND_SOC_DAI_H +#define __LINUX_SND_SOC_DAI_H + + +#include + +struct snd_pcm_substream; + +/* + * DAI hardware audio formats. + * + * Describes the physical PCM data formating and clocking. Add new formats + * to the end. + */ +#define SND_SOC_DAIFMT_I2S 0 /* I2S mode */ +#define SND_SOC_DAIFMT_RIGHT_J 1 /* Right Justified mode */ +#define SND_SOC_DAIFMT_LEFT_J 2 /* Left Justified mode */ +#define SND_SOC_DAIFMT_DSP_A 3 /* L data msb after FRM LRC */ +#define SND_SOC_DAIFMT_DSP_B 4 /* L data msb during FRM LRC */ +#define SND_SOC_DAIFMT_AC97 5 /* AC97 */ + +/* left and right justified also known as MSB and LSB respectively */ +#define SND_SOC_DAIFMT_MSB SND_SOC_DAIFMT_LEFT_J +#define SND_SOC_DAIFMT_LSB SND_SOC_DAIFMT_RIGHT_J + +/* + * DAI Clock gating. + * + * DAI bit clocks can be be gated (disabled) when not the DAI is not + * sending or receiving PCM data in a frame. This can be used to save power. + */ +#define SND_SOC_DAIFMT_CONT (0 << 4) /* continuous clock */ +#define SND_SOC_DAIFMT_GATED (1 << 4) /* clock is gated */ + +/* + * DAI Left/Right Clocks. + * + * Specifies whether the DAI can support different samples for similtanious + * playback and capture. This usually requires a seperate physical frame + * clock for playback and capture. + */ +#define SND_SOC_DAIFMT_SYNC (0 << 5) /* Tx FRM = Rx FRM */ +#define SND_SOC_DAIFMT_ASYNC (1 << 5) /* Tx FRM ~ Rx FRM */ + +/* + * TDM + * + * Time Division Multiplexing. Allows PCM data to be multplexed with other + * data on the DAI. + */ +#define SND_SOC_DAIFMT_TDM (1 << 6) + +/* + * DAI hardware signal inversions. + * + * Specifies whether the DAI can also support inverted clocks for the specified + * format. + */ +#define SND_SOC_DAIFMT_NB_NF (0 << 8) /* normal bit clock + frame */ +#define SND_SOC_DAIFMT_NB_IF (1 << 8) /* normal bclk + inv frm */ +#define SND_SOC_DAIFMT_IB_NF (2 << 8) /* invert bclk + nor frm */ +#define SND_SOC_DAIFMT_IB_IF (3 << 8) /* invert bclk + frm */ + +/* + * DAI hardware clock masters. + * + * This is wrt the codec, the inverse is true for the interface + * i.e. if the codec is clk and frm master then the interface is + * clk and frame slave. + */ +#define SND_SOC_DAIFMT_CBM_CFM (0 << 12) /* codec clk & frm master */ +#define SND_SOC_DAIFMT_CBS_CFM (1 << 12) /* codec clk slave & frm master */ +#define SND_SOC_DAIFMT_CBM_CFS (2 << 12) /* codec clk master & frame slave */ +#define SND_SOC_DAIFMT_CBS_CFS (3 << 12) /* codec clk & frm slave */ + +#define SND_SOC_DAIFMT_FORMAT_MASK 0x000f +#define SND_SOC_DAIFMT_CLOCK_MASK 0x00f0 +#define SND_SOC_DAIFMT_INV_MASK 0x0f00 +#define SND_SOC_DAIFMT_MASTER_MASK 0xf000 + +/* + * Master Clock Directions + */ +#define SND_SOC_CLOCK_IN 0 +#define SND_SOC_CLOCK_OUT 1 + +struct snd_soc_dai_ops; +struct snd_soc_dai; +struct snd_ac97_bus_ops; + +/* Digital Audio Interface clocking API.*/ +int snd_soc_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id, + unsigned int freq, int dir); + +int snd_soc_dai_set_clkdiv(struct snd_soc_dai *dai, + int div_id, int div); + +int snd_soc_dai_set_pll(struct snd_soc_dai *dai, + int pll_id, unsigned int freq_in, unsigned int freq_out); + +/* Digital Audio interface formatting */ +int snd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt); + +int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai, + unsigned int mask, int slots); + +int snd_soc_dai_set_tristate(struct snd_soc_dai *dai, int tristate); + +/* Digital Audio Interface mute */ +int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute); + +/* + * Digital Audio Interface. + * + * Describes the Digital Audio Interface in terms of it's ALSA, DAI and AC97 + * operations an capabilities. Codec and platfom drivers will register a this + * structure for every DAI they have. + * + * This structure covers the clocking, formating and ALSA operations for each + * interface a + */ +struct snd_soc_dai_ops { + /* + * DAI clocking configuration, all optional. + * Called by soc_card drivers, normally in their hw_params. + */ + int (*set_sysclk)(struct snd_soc_dai *dai, + int clk_id, unsigned int freq, int dir); + int (*set_pll)(struct snd_soc_dai *dai, + int pll_id, unsigned int freq_in, unsigned int freq_out); + int (*set_clkdiv)(struct snd_soc_dai *dai, int div_id, int div); + + /* + * DAI format configuration + * Called by soc_card drivers, normally in their hw_params. + */ + int (*set_fmt)(struct snd_soc_dai *dai, unsigned int fmt); + int (*set_tdm_slot)(struct snd_soc_dai *dai, + unsigned int mask, int slots); + int (*set_tristate)(struct snd_soc_dai *dai, int tristate); + + /* + * DAI digital mute - optional. + * Called by soc-core to minimise any pops. + */ + int (*digital_mute)(struct snd_soc_dai *dai, int mute); +}; + +/* + * Digital Audio Interface runtime data. + * + * Holds runtime data for a DAI. + */ +struct snd_soc_dai { + /* DAI description */ + char *name; + unsigned int id; + unsigned char type; + + /* DAI callbacks */ + int (*probe)(struct platform_device *pdev, + struct snd_soc_dai *dai); + void (*remove)(struct platform_device *pdev, + struct snd_soc_dai *dai); + int (*suspend)(struct platform_device *pdev, + struct snd_soc_dai *dai); + int (*resume)(struct platform_device *pdev, + struct snd_soc_dai *dai); + + /* ops */ + struct snd_soc_ops ops; + struct snd_soc_dai_ops dai_ops; + + /* DAI capabilities */ + struct snd_soc_pcm_stream capture; + struct snd_soc_pcm_stream playback; + + /* DAI runtime info */ + struct snd_pcm_runtime *runtime; + struct snd_soc_codec *codec; + unsigned int active; + unsigned char pop_wait:1; + void *dma_data; + + /* DAI private data */ + void *private_data; + + /* parent codec/platform */ + union { + struct snd_soc_codec *codec; + struct snd_soc_platform *platform; + }; + + struct list_head list; +}; + +#endif diff --git a/include/sound/soc.h b/include/sound/soc.h index 3be17b3c650c..e4465f73aa46 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -151,76 +151,6 @@ enum snd_soc_bias_level { #define SND_SOC_DAI_PCM 0x4 #define SND_SOC_DAI_AC97_BUS 0x8 /* for custom i.e. non ac97_codec.c */ -/* - * DAI hardware audio formats - */ -#define SND_SOC_DAIFMT_I2S 0 /* I2S mode */ -#define SND_SOC_DAIFMT_RIGHT_J 1 /* Right justified mode */ -#define SND_SOC_DAIFMT_LEFT_J 2 /* Left Justified mode */ -#define SND_SOC_DAIFMT_DSP_A 3 /* L data msb after FRM or LRC */ -#define SND_SOC_DAIFMT_DSP_B 4 /* L data msb during FRM or LRC */ -#define SND_SOC_DAIFMT_AC97 5 /* AC97 */ - -#define SND_SOC_DAIFMT_MSB SND_SOC_DAIFMT_LEFT_J -#define SND_SOC_DAIFMT_LSB SND_SOC_DAIFMT_RIGHT_J - -/* - * DAI Gating - */ -#define SND_SOC_DAIFMT_CONT (0 << 4) /* continuous clock */ -#define SND_SOC_DAIFMT_GATED (1 << 4) /* clock is gated when not Tx/Rx */ - -/* - * DAI Sync - * Synchronous LR (Left Right) clocks and Frame signals. - */ -#define SND_SOC_DAIFMT_SYNC (0 << 5) /* Tx FRM = Rx FRM */ -#define SND_SOC_DAIFMT_ASYNC (1 << 5) /* Tx FRM ~ Rx FRM */ - -/* - * TDM - */ -#define SND_SOC_DAIFMT_TDM (1 << 6) - -/* - * DAI hardware signal inversions - */ -#define SND_SOC_DAIFMT_NB_NF (0 << 8) /* normal bclk + frm */ -#define SND_SOC_DAIFMT_NB_IF (1 << 8) /* normal bclk + inv frm */ -#define SND_SOC_DAIFMT_IB_NF (2 << 8) /* invert bclk + nor frm */ -#define SND_SOC_DAIFMT_IB_IF (3 << 8) /* invert bclk + frm */ - -/* - * DAI hardware clock masters - * This is wrt the codec, the inverse is true for the interface - * i.e. if the codec is clk and frm master then the interface is - * clk and frame slave. - */ -#define SND_SOC_DAIFMT_CBM_CFM (0 << 12) /* codec clk & frm master */ -#define SND_SOC_DAIFMT_CBS_CFM (1 << 12) /* codec clk slave & frm master */ -#define SND_SOC_DAIFMT_CBM_CFS (2 << 12) /* codec clk master & frame slave */ -#define SND_SOC_DAIFMT_CBS_CFS (3 << 12) /* codec clk & frm slave */ - -#define SND_SOC_DAIFMT_FORMAT_MASK 0x000f -#define SND_SOC_DAIFMT_CLOCK_MASK 0x00f0 -#define SND_SOC_DAIFMT_INV_MASK 0x0f00 -#define SND_SOC_DAIFMT_MASTER_MASK 0xf000 - - -/* - * Master Clock Directions - */ -#define SND_SOC_CLOCK_IN 0 -#define SND_SOC_CLOCK_OUT 1 - -/* - * AC97 codec ID's bitmask - */ -#define SND_SOC_DAI_AC97_ID0 (1 << 0) -#define SND_SOC_DAI_AC97_ID1 (1 << 1) -#define SND_SOC_DAI_AC97_ID2 (1 << 2) -#define SND_SOC_DAI_AC97_ID3 (1 << 3) - struct snd_soc_device; struct snd_soc_pcm_stream; struct snd_soc_ops; @@ -260,27 +190,6 @@ int snd_soc_new_ac97_codec(struct snd_soc_codec *codec, struct snd_ac97_bus_ops *ops, int num); void snd_soc_free_ac97_codec(struct snd_soc_codec *codec); -/* Digital Audio Interface clocking API.*/ -int snd_soc_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id, - unsigned int freq, int dir); - -int snd_soc_dai_set_clkdiv(struct snd_soc_dai *dai, - int div_id, int div); - -int snd_soc_dai_set_pll(struct snd_soc_dai *dai, - int pll_id, unsigned int freq_in, unsigned int freq_out); - -/* Digital Audio interface formatting */ -int snd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt); - -int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai, - unsigned int mask, int slots); - -int snd_soc_dai_set_tristate(struct snd_soc_dai *dai, int tristate); - -/* Digital Audio Interface mute */ -int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute); - /* *Controls */ @@ -338,61 +247,6 @@ struct snd_soc_ops { int (*trigger)(struct snd_pcm_substream *, int); }; -/* ASoC DAI ops */ -struct snd_soc_dai_ops { - /* DAI clocking configuration */ - int (*set_sysclk)(struct snd_soc_dai *dai, - int clk_id, unsigned int freq, int dir); - int (*set_pll)(struct snd_soc_dai *dai, - int pll_id, unsigned int freq_in, unsigned int freq_out); - int (*set_clkdiv)(struct snd_soc_dai *dai, int div_id, int div); - - /* DAI format configuration */ - int (*set_fmt)(struct snd_soc_dai *dai, unsigned int fmt); - int (*set_tdm_slot)(struct snd_soc_dai *dai, - unsigned int mask, int slots); - int (*set_tristate)(struct snd_soc_dai *dai, int tristate); - - /* digital mute */ - int (*digital_mute)(struct snd_soc_dai *dai, int mute); -}; - -/* SoC DAI (Digital Audio Interface) */ -struct snd_soc_dai { - /* DAI description */ - char *name; - unsigned int id; - unsigned char type; - - /* DAI callbacks */ - int (*probe)(struct platform_device *pdev, - struct snd_soc_dai *dai); - void (*remove)(struct platform_device *pdev, - struct snd_soc_dai *dai); - int (*suspend)(struct platform_device *pdev, - struct snd_soc_dai *dai); - int (*resume)(struct platform_device *pdev, - struct snd_soc_dai *dai); - - /* ops */ - struct snd_soc_ops ops; - struct snd_soc_dai_ops dai_ops; - - /* DAI capabilities */ - struct snd_soc_pcm_stream capture; - struct snd_soc_pcm_stream playback; - - /* DAI runtime info */ - struct snd_pcm_runtime *runtime; - struct snd_soc_codec *codec; - unsigned int active; - unsigned char pop_wait:1; - void *dma_data; - - /* DAI private data */ - void *private_data; -}; - /* SoC Audio Codec */ struct snd_soc_codec { char *name; @@ -543,4 +397,6 @@ struct soc_enum { void *dapm; }; +#include + #endif -- cgit v1.2.3 From dee89c4d94433520e4e3977ae203d4cfbfe385fb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 18 Nov 2008 22:11:38 +0000 Subject: ASoC: Merge snd_soc_ops into snd_soc_dai_ops Liam Girdwood's ASoC v2 work avoids having two different ops structures for DAIs by merging the members of struct snd_soc_ops into struct snd_soc_dai_ops, allowing per DAI configuration for everything. Backport this change. This paves the way for future work allowing any combination of DAIs to be connected rather than having fixed purpose CODEC and CPU DAIs and only allowing CODEC<->CPU interconnections. Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 20 +++++++++-- sound/soc/atmel/atmel_ssc_dai.c | 21 +++++------ sound/soc/au1x/psc-ac97.c | 5 +-- sound/soc/au1x/psc-i2s.c | 8 ++--- sound/soc/blackfin/bf5xx-i2s.c | 12 ++++--- sound/soc/codecs/ac97.c | 3 +- sound/soc/codecs/ak4535.c | 5 ++- sound/soc/codecs/cs4270.c | 11 +++--- sound/soc/codecs/ssm2602.c | 14 ++++---- sound/soc/codecs/tlv320aic23.c | 19 +++++----- sound/soc/codecs/tlv320aic26.c | 5 ++- sound/soc/codecs/tlv320aic3x.c | 5 ++- sound/soc/codecs/twl4030.c | 5 ++- sound/soc/codecs/uda134x.c | 12 +++---- sound/soc/codecs/uda1380.c | 15 ++++---- sound/soc/codecs/wm8510.c | 5 ++- sound/soc/codecs/wm8580.c | 12 +++---- sound/soc/codecs/wm8728.c | 5 ++- sound/soc/codecs/wm8731.c | 11 +++--- sound/soc/codecs/wm8750.c | 5 ++- sound/soc/codecs/wm8753.c | 25 ++++++------- sound/soc/codecs/wm8900.c | 5 ++- sound/soc/codecs/wm8903.c | 11 +++--- sound/soc/codecs/wm8971.c | 5 ++- sound/soc/codecs/wm8990.c | 6 ++-- sound/soc/codecs/wm9712.c | 6 ++-- sound/soc/codecs/wm9713.c | 21 +++++------ sound/soc/davinci/davinci-i2s.c | 12 ++++--- sound/soc/davinci/davinci-sffsdr.c | 3 +- sound/soc/fsl/fsl_ssi.c | 14 ++++---- sound/soc/fsl/mpc5200_psc_i2s.c | 17 +++++---- sound/soc/omap/omap-mcbsp.c | 14 ++++---- sound/soc/omap/omap2evm.c | 3 +- sound/soc/pxa/pxa-ssp.c | 20 +++++------ sound/soc/pxa/pxa2xx-ac97.c | 9 +++-- sound/soc/pxa/pxa2xx-i2s.c | 15 ++++---- sound/soc/s3c24xx/s3c2412-i2s.c | 8 ++--- sound/soc/s3c24xx/s3c2443-ac97.c | 8 +++-- sound/soc/s3c24xx/s3c24xx-i2s.c | 9 ++--- sound/soc/sh/hac.c | 3 +- sound/soc/sh/ssi.c | 16 ++++----- sound/soc/soc-core.c | 74 +++++++++++++++++++------------------- 42 files changed, 265 insertions(+), 237 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 08b8f7025c64..f51cb55902f7 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -156,6 +156,23 @@ struct snd_soc_dai_ops { * Called by soc-core to minimise any pops. */ int (*digital_mute)(struct snd_soc_dai *dai, int mute); + + /* + * ALSA PCM audio operations - all optional. + * Called by soc-core during audio PCM operations. + */ + int (*startup)(struct snd_pcm_substream *, + struct snd_soc_dai *); + void (*shutdown)(struct snd_pcm_substream *, + struct snd_soc_dai *); + int (*hw_params)(struct snd_pcm_substream *, + struct snd_pcm_hw_params *, struct snd_soc_dai *); + int (*hw_free)(struct snd_pcm_substream *, + struct snd_soc_dai *); + int (*prepare)(struct snd_pcm_substream *, + struct snd_soc_dai *); + int (*trigger)(struct snd_pcm_substream *, int, + struct snd_soc_dai *); }; /* @@ -180,8 +197,7 @@ struct snd_soc_dai { struct snd_soc_dai *dai); /* ops */ - struct snd_soc_ops ops; - struct snd_soc_dai_ops dai_ops; + struct snd_soc_dai_ops ops; /* DAI capabilities */ struct snd_soc_pcm_stream capture; diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index d290b7894917..916f73b9a18f 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -202,7 +202,8 @@ static irqreturn_t atmel_ssc_interrupt(int irq, void *dev_id) /* * Startup. Only that one substream allowed in each direction. */ -static int atmel_ssc_startup(struct snd_pcm_substream *substream) +static int atmel_ssc_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = snd_pcm_substream_chip(substream); struct atmel_ssc_info *ssc_p = &ssc_info[rtd->dai->cpu_dai->id]; @@ -231,7 +232,8 @@ static int atmel_ssc_startup(struct snd_pcm_substream *substream) * Shutdown. Clear DMA parameters and shutdown the SSC if there * are no other substreams open. */ -static void atmel_ssc_shutdown(struct snd_pcm_substream *substream) +static void atmel_ssc_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = snd_pcm_substream_chip(substream); struct atmel_ssc_info *ssc_p = &ssc_info[rtd->dai->cpu_dai->id]; @@ -332,7 +334,8 @@ static int atmel_ssc_set_dai_clkdiv(struct snd_soc_dai *cpu_dai, * Configure the SSC. */ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = snd_pcm_substream_chip(substream); int id = rtd->dai->cpu_dai->id; @@ -600,7 +603,8 @@ static int atmel_ssc_hw_params(struct snd_pcm_substream *substream, } -static int atmel_ssc_prepare(struct snd_pcm_substream *substream) +static int atmel_ssc_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = snd_pcm_substream_chip(substream); struct atmel_ssc_info *ssc_p = &ssc_info[rtd->dai->cpu_dai->id]; @@ -715,8 +719,7 @@ struct snd_soc_dai atmel_ssc_dai[NUM_SSC_DEVICES] = { .startup = atmel_ssc_startup, .shutdown = atmel_ssc_shutdown, .prepare = atmel_ssc_prepare, - .hw_params = atmel_ssc_hw_params,}, - .dai_ops = { + .hw_params = atmel_ssc_hw_params, .set_fmt = atmel_ssc_set_dai_fmt, .set_clkdiv = atmel_ssc_set_dai_clkdiv,}, .private_data = &ssc_info[0], @@ -741,8 +744,7 @@ struct snd_soc_dai atmel_ssc_dai[NUM_SSC_DEVICES] = { .startup = atmel_ssc_startup, .shutdown = atmel_ssc_shutdown, .prepare = atmel_ssc_prepare, - .hw_params = atmel_ssc_hw_params,}, - .dai_ops = { + .hw_params = atmel_ssc_hw_params, .set_fmt = atmel_ssc_set_dai_fmt, .set_clkdiv = atmel_ssc_set_dai_clkdiv,}, .private_data = &ssc_info[1], @@ -766,8 +768,7 @@ struct snd_soc_dai atmel_ssc_dai[NUM_SSC_DEVICES] = { .startup = atmel_ssc_startup, .shutdown = atmel_ssc_shutdown, .prepare = atmel_ssc_prepare, - .hw_params = atmel_ssc_hw_params,}, - .dai_ops = { + .hw_params = atmel_ssc_hw_params, .set_fmt = atmel_ssc_set_dai_fmt, .set_clkdiv = atmel_ssc_set_dai_clkdiv,}, .private_data = &ssc_info[2], diff --git a/sound/soc/au1x/psc-ac97.c b/sound/soc/au1x/psc-ac97.c index 57facbad6825..ad60a6042cad 100644 --- a/sound/soc/au1x/psc-ac97.c +++ b/sound/soc/au1x/psc-ac97.c @@ -160,7 +160,8 @@ struct snd_ac97_bus_ops soc_ac97_ops = { EXPORT_SYMBOL_GPL(soc_ac97_ops); static int au1xpsc_ac97_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { /* FIXME */ struct au1xpsc_audio_data *pscdata = au1xpsc_ac97_workdata; @@ -210,7 +211,7 @@ static int au1xpsc_ac97_hw_params(struct snd_pcm_substream *substream, } static int au1xpsc_ac97_trigger(struct snd_pcm_substream *substream, - int cmd) + int cmd, struct snd_soc_dai *dai) { /* FIXME */ struct au1xpsc_audio_data *pscdata = au1xpsc_ac97_workdata; diff --git a/sound/soc/au1x/psc-i2s.c b/sound/soc/au1x/psc-i2s.c index 9384702c7ebd..05a5acbb16ae 100644 --- a/sound/soc/au1x/psc-i2s.c +++ b/sound/soc/au1x/psc-i2s.c @@ -116,7 +116,8 @@ out: } static int au1xpsc_i2s_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct au1xpsc_audio_data *pscdata = au1xpsc_i2s_workdata; @@ -240,7 +241,8 @@ static int au1xpsc_i2s_stop(struct au1xpsc_audio_data *pscdata, int stype) return 0; } -static int au1xpsc_i2s_trigger(struct snd_pcm_substream *substream, int cmd) +static int au1xpsc_i2s_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *dai) { struct au1xpsc_audio_data *pscdata = au1xpsc_i2s_workdata; int ret, stype = SUBSTREAM_TYPE(substream); @@ -389,8 +391,6 @@ struct snd_soc_dai au1xpsc_i2s_dai = { .ops = { .trigger = au1xpsc_i2s_trigger, .hw_params = au1xpsc_i2s_hw_params, - }, - .dai_ops = { .set_fmt = au1xpsc_i2s_set_fmt, }, }; diff --git a/sound/soc/blackfin/bf5xx-i2s.c b/sound/soc/blackfin/bf5xx-i2s.c index e020c160ee44..4e675b55b182 100644 --- a/sound/soc/blackfin/bf5xx-i2s.c +++ b/sound/soc/blackfin/bf5xx-i2s.c @@ -132,7 +132,8 @@ static int bf5xx_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai, return ret; } -static int bf5xx_i2s_startup(struct snd_pcm_substream *substream) +static int bf5xx_i2s_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { pr_debug("%s enter\n", __func__); @@ -142,7 +143,8 @@ static int bf5xx_i2s_startup(struct snd_pcm_substream *substream) } static int bf5xx_i2s_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { int ret = 0; @@ -193,7 +195,8 @@ static int bf5xx_i2s_hw_params(struct snd_pcm_substream *substream, return 0; } -static void bf5xx_i2s_shutdown(struct snd_pcm_substream *substream) +static void bf5xx_i2s_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { pr_debug("%s enter\n", __func__); bf5xx_i2s.counter--; @@ -307,8 +310,7 @@ struct snd_soc_dai bf5xx_i2s_dai = { .ops = { .startup = bf5xx_i2s_startup, .shutdown = bf5xx_i2s_shutdown, - .hw_params = bf5xx_i2s_hw_params,}, - .dai_ops = { + .hw_params = bf5xx_i2s_hw_params, .set_fmt = bf5xx_i2s_set_dai_fmt, }, }; diff --git a/sound/soc/codecs/ac97.c b/sound/soc/codecs/ac97.c index bd1ebdc6c86c..8a93aff359d0 100644 --- a/sound/soc/codecs/ac97.c +++ b/sound/soc/codecs/ac97.c @@ -24,7 +24,8 @@ #define AC97_VERSION "0.6" -static int ac97_prepare(struct snd_pcm_substream *substream) +static int ac97_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_soc_pcm_runtime *rtd = substream->private_data; diff --git a/sound/soc/codecs/ak4535.c b/sound/soc/codecs/ak4535.c index 2a89b5888e11..c742290e5533 100644 --- a/sound/soc/codecs/ak4535.c +++ b/sound/soc/codecs/ak4535.c @@ -339,7 +339,8 @@ static int ak4535_set_dai_sysclk(struct snd_soc_dai *codec_dai, } static int ak4535_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -451,8 +452,6 @@ struct snd_soc_dai ak4535_dai = { .formats = SNDRV_PCM_FMTBIT_S16_LE,}, .ops = { .hw_params = ak4535_hw_params, - }, - .dai_ops = { .set_fmt = ak4535_set_dai_fmt, .digital_mute = ak4535_mute, .set_sysclk = ak4535_set_dai_sysclk, diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c index 0ff476d7057c..7507d468b200 100644 --- a/sound/soc/codecs/cs4270.c +++ b/sound/soc/codecs/cs4270.c @@ -360,13 +360,14 @@ static int cs4270_i2c_write(struct snd_soc_codec *codec, unsigned int reg, /* * Program the CS4270 with the given hardware parameters. * - * The .dai_ops functions are used to provide board-specific data, like + * The .ops functions are used to provide board-specific data, like * input frequencies, to this driver. This function takes that information, * combines it with the hardware parameters provided, and programs the * hardware accordingly. */ static int cs4270_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -710,10 +711,10 @@ static int cs4270_probe(struct platform_device *pdev) if (codec->control_data) { /* Initialize codec ops */ cs4270_dai.ops.hw_params = cs4270_hw_params; - cs4270_dai.dai_ops.set_sysclk = cs4270_set_dai_sysclk; - cs4270_dai.dai_ops.set_fmt = cs4270_set_dai_fmt; + cs4270_dai.ops.set_sysclk = cs4270_set_dai_sysclk; + cs4270_dai.ops.set_fmt = cs4270_set_dai_fmt; #ifdef CONFIG_SND_SOC_CS4270_HWMUTE - cs4270_dai.dai_ops.digital_mute = cs4270_mute; + cs4270_dai.ops.digital_mute = cs4270_mute; #endif } else printk(KERN_INFO "cs4270: no I2C device found, " diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c index 56dc1c9c7c52..0c5884ea1b00 100644 --- a/sound/soc/codecs/ssm2602.c +++ b/sound/soc/codecs/ssm2602.c @@ -285,7 +285,8 @@ static inline int get_coeff(int mclk, int rate) } static int ssm2602_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { u16 srate; struct snd_soc_pcm_runtime *rtd = substream->private_data; @@ -330,7 +331,8 @@ static int ssm2602_hw_params(struct snd_pcm_substream *substream, return 0; } -static int ssm2602_startup(struct snd_pcm_substream *substream) +static int ssm2602_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -366,7 +368,8 @@ static int ssm2602_startup(struct snd_pcm_substream *substream) return 0; } -static int ssm2602_pcm_prepare(struct snd_pcm_substream *substream) +static int ssm2602_pcm_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -377,7 +380,8 @@ static int ssm2602_pcm_prepare(struct snd_pcm_substream *substream) return 0; } -static void ssm2602_shutdown(struct snd_pcm_substream *substream) +static void ssm2602_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -536,8 +540,6 @@ struct snd_soc_dai ssm2602_dai = { .prepare = ssm2602_pcm_prepare, .hw_params = ssm2602_hw_params, .shutdown = ssm2602_shutdown, - }, - .dai_ops = { .digital_mute = ssm2602_mute, .set_sysclk = ssm2602_set_dai_sysclk, .set_fmt = ssm2602_set_dai_fmt, diff --git a/sound/soc/codecs/tlv320aic23.c b/sound/soc/codecs/tlv320aic23.c index c903e4f48dc4..a4e13d0688c9 100644 --- a/sound/soc/codecs/tlv320aic23.c +++ b/sound/soc/codecs/tlv320aic23.c @@ -418,7 +418,8 @@ static int tlv320aic23_add_widgets(struct snd_soc_codec *codec) } static int tlv320aic23_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -465,7 +466,8 @@ static int tlv320aic23_hw_params(struct snd_pcm_substream *substream, return 0; } -static int tlv320aic23_pcm_prepare(struct snd_pcm_substream *substream) +static int tlv320aic23_pcm_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -477,7 +479,8 @@ static int tlv320aic23_pcm_prepare(struct snd_pcm_substream *substream) return 0; } -static void tlv320aic23_shutdown(struct snd_pcm_substream *substream) +static void tlv320aic23_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -613,12 +616,10 @@ struct snd_soc_dai tlv320aic23_dai = { .prepare = tlv320aic23_pcm_prepare, .hw_params = tlv320aic23_hw_params, .shutdown = tlv320aic23_shutdown, - }, - .dai_ops = { - .digital_mute = tlv320aic23_mute, - .set_fmt = tlv320aic23_set_dai_fmt, - .set_sysclk = tlv320aic23_set_dai_sysclk, - } + .digital_mute = tlv320aic23_mute, + .set_fmt = tlv320aic23_set_dai_fmt, + .set_sysclk = tlv320aic23_set_dai_sysclk, + } }; EXPORT_SYMBOL_GPL(tlv320aic23_dai); diff --git a/sound/soc/codecs/tlv320aic26.c b/sound/soc/codecs/tlv320aic26.c index bed8a9e63ddc..6b7ddfc92573 100644 --- a/sound/soc/codecs/tlv320aic26.c +++ b/sound/soc/codecs/tlv320aic26.c @@ -125,7 +125,8 @@ static int aic26_reg_write(struct snd_soc_codec *codec, unsigned int reg, * Digital Audio Interface Operations */ static int aic26_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -287,8 +288,6 @@ struct snd_soc_dai aic26_dai = { }, .ops = { .hw_params = aic26_hw_params, - }, - .dai_ops = { .digital_mute = aic26_mute, .set_sysclk = aic26_set_sysclk, .set_fmt = aic26_set_fmt, diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index cff276ee261e..b76bcc3c4110 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -694,7 +694,8 @@ static int aic3x_add_widgets(struct snd_soc_codec *codec) } static int aic3x_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -1009,8 +1010,6 @@ struct snd_soc_dai aic3x_dai = { .formats = AIC3X_FORMATS,}, .ops = { .hw_params = aic3x_hw_params, - }, - .dai_ops = { .digital_mute = aic3x_mute, .set_sysclk = aic3x_set_dai_sysclk, .set_fmt = aic3x_set_dai_fmt, diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index c778eb446a5b..33489515b928 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -343,7 +343,8 @@ static int twl4030_set_bias_level(struct snd_soc_codec *codec, } static int twl4030_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -523,8 +524,6 @@ struct snd_soc_dai twl4030_dai = { .formats = TWL4030_FORMATS,}, .ops = { .hw_params = twl4030_hw_params, - }, - .dai_ops = { .set_sysclk = twl4030_set_dai_sysclk, .set_fmt = twl4030_set_dai_fmt, } diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c index 69ef521a2ed1..91f333cdc7cf 100644 --- a/sound/soc/codecs/uda134x.c +++ b/sound/soc/codecs/uda134x.c @@ -168,7 +168,8 @@ static int uda134x_mute(struct snd_soc_dai *dai, int mute) return 0; } -static int uda134x_startup(struct snd_pcm_substream *substream) +static int uda134x_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -200,7 +201,8 @@ static int uda134x_startup(struct snd_pcm_substream *substream) return 0; } -static void uda134x_shutdown(struct snd_pcm_substream *substream) +static void uda134x_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -214,7 +216,8 @@ static void uda134x_shutdown(struct snd_pcm_substream *substream) } static int uda134x_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -484,9 +487,6 @@ struct snd_soc_dai uda134x_dai = { .startup = uda134x_startup, .shutdown = uda134x_shutdown, .hw_params = uda134x_hw_params, - }, - /* DAI operations */ - .dai_ops = { .digital_mute = uda134x_mute, .set_sysclk = uda134x_set_dai_sysclk, .set_fmt = uda134x_set_dai_fmt, diff --git a/sound/soc/codecs/uda1380.c b/sound/soc/codecs/uda1380.c index a69ee72a7af5..330877c70699 100644 --- a/sound/soc/codecs/uda1380.c +++ b/sound/soc/codecs/uda1380.c @@ -407,7 +407,8 @@ static int uda1380_set_dai_fmt(struct snd_soc_dai *codec_dai, * when the DAI is being clocked by the CPU DAI. It's up to the * machine and cpu DAI driver to do this before we are called. */ -static int uda1380_pcm_prepare(struct snd_pcm_substream *substream) +static int uda1380_pcm_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -439,7 +440,8 @@ static int uda1380_pcm_prepare(struct snd_pcm_substream *substream) } static int uda1380_pcm_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -477,7 +479,8 @@ static int uda1380_pcm_hw_params(struct snd_pcm_substream *substream, return 0; } -static void uda1380_pcm_shutdown(struct snd_pcm_substream *substream) +static void uda1380_pcm_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -560,8 +563,6 @@ struct snd_soc_dai uda1380_dai[] = { .hw_params = uda1380_pcm_hw_params, .shutdown = uda1380_pcm_shutdown, .prepare = uda1380_pcm_prepare, - }, - .dai_ops = { .digital_mute = uda1380_mute, .set_fmt = uda1380_set_dai_fmt, }, @@ -579,8 +580,6 @@ struct snd_soc_dai uda1380_dai[] = { .hw_params = uda1380_pcm_hw_params, .shutdown = uda1380_pcm_shutdown, .prepare = uda1380_pcm_prepare, - }, - .dai_ops = { .digital_mute = uda1380_mute, .set_fmt = uda1380_set_dai_fmt, }, @@ -598,8 +597,6 @@ struct snd_soc_dai uda1380_dai[] = { .hw_params = uda1380_pcm_hw_params, .shutdown = uda1380_pcm_shutdown, .prepare = uda1380_pcm_prepare, - }, - .dai_ops = { .set_fmt = uda1380_set_dai_fmt, }, }, diff --git a/sound/soc/codecs/wm8510.c b/sound/soc/codecs/wm8510.c index d8ca2da8d634..173b66c0c766 100644 --- a/sound/soc/codecs/wm8510.c +++ b/sound/soc/codecs/wm8510.c @@ -463,7 +463,8 @@ static int wm8510_set_dai_fmt(struct snd_soc_dai *codec_dai, } static int wm8510_pcm_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -585,8 +586,6 @@ struct snd_soc_dai wm8510_dai = { .formats = WM8510_FORMATS,}, .ops = { .hw_params = wm8510_pcm_hw_params, - }, - .dai_ops = { .digital_mute = wm8510_mute, .set_fmt = wm8510_set_dai_fmt, .set_clkdiv = wm8510_set_dai_clkdiv, diff --git a/sound/soc/codecs/wm8580.c b/sound/soc/codecs/wm8580.c index cbcd7c324ab9..220d4b68904a 100644 --- a/sound/soc/codecs/wm8580.c +++ b/sound/soc/codecs/wm8580.c @@ -548,13 +548,13 @@ static int wm8580_set_dai_pll(struct snd_soc_dai *codec_dai, * Set PCM DAI bit size and sample rate. */ static int wm8580_paif_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai_link *dai = rtd->dai; struct snd_soc_device *socdev = rtd->socdev; struct snd_soc_codec *codec = socdev->codec; - u16 paifb = wm8580_read(codec, WM8580_PAIF3 + dai->codec_dai->id); + u16 paifb = wm8580_read(codec, WM8580_PAIF3 + dai->id); paifb &= ~WM8580_AIF_LENGTH_MASK; /* bit size */ @@ -574,7 +574,7 @@ static int wm8580_paif_hw_params(struct snd_pcm_substream *substream, return -EINVAL; } - wm8580_write(codec, WM8580_PAIF3 + dai->codec_dai->id, paifb); + wm8580_write(codec, WM8580_PAIF3 + dai->id, paifb); return 0; } @@ -798,8 +798,6 @@ struct snd_soc_dai wm8580_dai[] = { }, .ops = { .hw_params = wm8580_paif_hw_params, - }, - .dai_ops = { .set_fmt = wm8580_set_paif_dai_fmt, .set_clkdiv = wm8580_set_dai_clkdiv, .set_pll = wm8580_set_dai_pll, @@ -818,8 +816,6 @@ struct snd_soc_dai wm8580_dai[] = { }, .ops = { .hw_params = wm8580_paif_hw_params, - }, - .dai_ops = { .set_fmt = wm8580_set_paif_dai_fmt, .set_clkdiv = wm8580_set_dai_clkdiv, .set_pll = wm8580_set_dai_pll, diff --git a/sound/soc/codecs/wm8728.c b/sound/soc/codecs/wm8728.c index 3e39dea61241..71949bd320d3 100644 --- a/sound/soc/codecs/wm8728.c +++ b/sound/soc/codecs/wm8728.c @@ -147,7 +147,8 @@ static int wm8728_mute(struct snd_soc_dai *dai, int mute) } static int wm8728_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -269,8 +270,6 @@ struct snd_soc_dai wm8728_dai = { }, .ops = { .hw_params = wm8728_hw_params, - }, - .dai_ops = { .digital_mute = wm8728_mute, .set_fmt = wm8728_set_dai_fmt, } diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c index 7f8a7e36b33e..c0f277053bb2 100644 --- a/sound/soc/codecs/wm8731.c +++ b/sound/soc/codecs/wm8731.c @@ -264,7 +264,8 @@ static inline int get_coeff(int mclk, int rate) } static int wm8731_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -293,7 +294,8 @@ static int wm8731_hw_params(struct snd_pcm_substream *substream, return 0; } -static int wm8731_pcm_prepare(struct snd_pcm_substream *substream) +static int wm8731_pcm_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -305,7 +307,8 @@ static int wm8731_pcm_prepare(struct snd_pcm_substream *substream) return 0; } -static void wm8731_shutdown(struct snd_pcm_substream *substream) +static void wm8731_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -461,8 +464,6 @@ struct snd_soc_dai wm8731_dai = { .prepare = wm8731_pcm_prepare, .hw_params = wm8731_hw_params, .shutdown = wm8731_shutdown, - }, - .dai_ops = { .digital_mute = wm8731_mute, .set_sysclk = wm8731_set_dai_sysclk, .set_fmt = wm8731_set_dai_fmt, diff --git a/sound/soc/codecs/wm8750.c b/sound/soc/codecs/wm8750.c index 9b7296ee5b08..860a1d56830a 100644 --- a/sound/soc/codecs/wm8750.c +++ b/sound/soc/codecs/wm8750.c @@ -614,7 +614,8 @@ static int wm8750_set_dai_fmt(struct snd_soc_dai *codec_dai, } static int wm8750_pcm_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -709,8 +710,6 @@ struct snd_soc_dai wm8750_dai = { .formats = WM8750_FORMATS,}, .ops = { .hw_params = wm8750_pcm_hw_params, - }, - .dai_ops = { .digital_mute = wm8750_mute, .set_fmt = wm8750_set_dai_fmt, .set_sysclk = wm8750_set_dai_sysclk, diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c index d426eaa22185..5e4cd3bb824a 100644 --- a/sound/soc/codecs/wm8753.c +++ b/sound/soc/codecs/wm8753.c @@ -922,7 +922,8 @@ static int wm8753_vdac_adc_set_dai_fmt(struct snd_soc_dai *codec_dai, * Set PCM DAI bit size and sample rate. */ static int wm8753_pcm_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -1155,7 +1156,8 @@ static int wm8753_i2s_set_dai_fmt(struct snd_soc_dai *codec_dai, * Set PCM DAI bit size and sample rate. */ static int wm8753_i2s_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -1323,16 +1325,15 @@ static const struct snd_soc_dai wm8753_all_dai[] = { .channels_min = 1, .channels_max = 2, .rates = WM8753_RATES, - .formats = WM8753_FORMATS,}, + .formats = WM8753_FORMATS}, .capture = { /* dummy for fast DAI switching */ .stream_name = "Capture", .channels_min = 1, .channels_max = 2, .rates = WM8753_RATES, - .formats = WM8753_FORMATS,}, + .formats = WM8753_FORMATS}, .ops = { - .hw_params = wm8753_i2s_hw_params,}, - .dai_ops = { + .hw_params = wm8753_i2s_hw_params, .digital_mute = wm8753_mute, .set_fmt = wm8753_mode1h_set_dai_fmt, .set_clkdiv = wm8753_set_dai_clkdiv, @@ -1356,8 +1357,7 @@ static const struct snd_soc_dai wm8753_all_dai[] = { .rates = WM8753_RATES, .formats = WM8753_FORMATS,}, .ops = { - .hw_params = wm8753_pcm_hw_params,}, - .dai_ops = { + .hw_params = wm8753_pcm_hw_params, .digital_mute = wm8753_mute, .set_fmt = wm8753_mode1v_set_dai_fmt, .set_clkdiv = wm8753_set_dai_clkdiv, @@ -1385,8 +1385,7 @@ static const struct snd_soc_dai wm8753_all_dai[] = { .rates = WM8753_RATES, .formats = WM8753_FORMATS,}, .ops = { - .hw_params = wm8753_pcm_hw_params,}, - .dai_ops = { + .hw_params = wm8753_pcm_hw_params, .digital_mute = wm8753_mute, .set_fmt = wm8753_mode2_set_dai_fmt, .set_clkdiv = wm8753_set_dai_clkdiv, @@ -1410,8 +1409,7 @@ static const struct snd_soc_dai wm8753_all_dai[] = { .rates = WM8753_RATES, .formats = WM8753_FORMATS,}, .ops = { - .hw_params = wm8753_i2s_hw_params,}, - .dai_ops = { + .hw_params = wm8753_i2s_hw_params, .digital_mute = wm8753_mute, .set_fmt = wm8753_mode3_4_set_dai_fmt, .set_clkdiv = wm8753_set_dai_clkdiv, @@ -1439,8 +1437,7 @@ static const struct snd_soc_dai wm8753_all_dai[] = { .rates = WM8753_RATES, .formats = WM8753_FORMATS,}, .ops = { - .hw_params = wm8753_i2s_hw_params,}, - .dai_ops = { + .hw_params = wm8753_i2s_hw_params, .digital_mute = wm8753_mute, .set_fmt = wm8753_mode3_4_set_dai_fmt, .set_clkdiv = wm8753_set_dai_clkdiv, diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c index de016f41e04c..d1326be91c8b 100644 --- a/sound/soc/codecs/wm8900.c +++ b/sound/soc/codecs/wm8900.c @@ -727,7 +727,8 @@ static int wm8900_add_widgets(struct snd_soc_codec *codec) } static int wm8900_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -1117,8 +1118,6 @@ struct snd_soc_dai wm8900_dai = { }, .ops = { .hw_params = wm8900_hw_params, - }, - .dai_ops = { .set_clkdiv = wm8900_set_dai_clkdiv, .set_pll = wm8900_set_dai_pll, .set_fmt = wm8900_set_dai_fmt, diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index ce40d7877605..efbe8927b7d2 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -1257,7 +1257,8 @@ static struct { { 0, 0 }, }; -static int wm8903_startup(struct snd_pcm_substream *substream) +static int wm8903_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -1298,7 +1299,8 @@ static int wm8903_startup(struct snd_pcm_substream *substream) return 0; } -static void wm8903_shutdown(struct snd_pcm_substream *substream) +static void wm8903_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -1317,7 +1319,8 @@ static void wm8903_shutdown(struct snd_pcm_substream *substream) } static int wm8903_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -1515,8 +1518,6 @@ struct snd_soc_dai wm8903_dai = { .startup = wm8903_startup, .shutdown = wm8903_shutdown, .hw_params = wm8903_hw_params, - }, - .dai_ops = { .digital_mute = wm8903_digital_mute, .set_fmt = wm8903_set_dai_fmt, .set_sysclk = wm8903_set_dai_sysclk diff --git a/sound/soc/codecs/wm8971.c b/sound/soc/codecs/wm8971.c index f41a578ddd4f..26edcc9d6e87 100644 --- a/sound/soc/codecs/wm8971.c +++ b/sound/soc/codecs/wm8971.c @@ -541,7 +541,8 @@ static int wm8971_set_dai_fmt(struct snd_soc_dai *codec_dai, } static int wm8971_pcm_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -634,8 +635,6 @@ struct snd_soc_dai wm8971_dai = { .formats = WM8971_FORMATS,}, .ops = { .hw_params = wm8971_pcm_hw_params, - }, - .dai_ops = { .digital_mute = wm8971_mute, .set_fmt = wm8971_set_dai_fmt, .set_sysclk = wm8971_set_dai_sysclk, diff --git a/sound/soc/codecs/wm8990.c b/sound/soc/codecs/wm8990.c index 2d7b0096d929..13926516d16e 100644 --- a/sound/soc/codecs/wm8990.c +++ b/sound/soc/codecs/wm8990.c @@ -1172,7 +1172,8 @@ static int wm8990_set_dai_clkdiv(struct snd_soc_dai *codec_dai, * Set PCM DAI bit size and sample rate. */ static int wm8990_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -1362,8 +1363,7 @@ struct snd_soc_dai wm8990_dai = { .rates = WM8990_RATES, .formats = WM8990_FORMATS,}, .ops = { - .hw_params = wm8990_hw_params,}, - .dai_ops = { + .hw_params = wm8990_hw_params, .digital_mute = wm8990_mute, .set_fmt = wm8990_set_dai_fmt, .set_clkdiv = wm8990_set_dai_clkdiv, diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c index ffb471e420e2..81c38e7ad344 100644 --- a/sound/soc/codecs/wm9712.c +++ b/sound/soc/codecs/wm9712.c @@ -487,7 +487,8 @@ static int ac97_write(struct snd_soc_codec *codec, unsigned int reg, return 0; } -static int ac97_prepare(struct snd_pcm_substream *substream) +static int ac97_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_soc_pcm_runtime *rtd = substream->private_data; @@ -507,7 +508,8 @@ static int ac97_prepare(struct snd_pcm_substream *substream) return ac97_write(codec, reg, runtime->rate); } -static int ac97_aux_prepare(struct snd_pcm_substream *substream) +static int ac97_aux_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_soc_pcm_runtime *rtd = substream->private_data; diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c index 740bf3cde18d..a0cc5ac969a1 100644 --- a/sound/soc/codecs/wm9713.c +++ b/sound/soc/codecs/wm9713.c @@ -928,7 +928,8 @@ static int wm9713_set_dai_fmt(struct snd_soc_dai *codec_dai, } static int wm9713_pcm_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -954,7 +955,8 @@ static int wm9713_pcm_hw_params(struct snd_pcm_substream *substream, return 0; } -static void wm9713_voiceshutdown(struct snd_pcm_substream *substream) +static void wm9713_voiceshutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; @@ -969,7 +971,8 @@ static void wm9713_voiceshutdown(struct snd_pcm_substream *substream) ac97_write(codec, AC97_EXTENDED_MID, status); } -static int ac97_hifi_prepare(struct snd_pcm_substream *substream) +static int ac97_hifi_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_soc_pcm_runtime *rtd = substream->private_data; @@ -989,7 +992,8 @@ static int ac97_hifi_prepare(struct snd_pcm_substream *substream) return ac97_write(codec, reg, runtime->rate); } -static int ac97_aux_prepare(struct snd_pcm_substream *substream) +static int ac97_aux_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_soc_pcm_runtime *rtd = substream->private_data; @@ -1042,8 +1046,7 @@ struct snd_soc_dai wm9713_dai[] = { .rates = WM9713_RATES, .formats = SNDRV_PCM_FMTBIT_S16_LE,}, .ops = { - .prepare = ac97_hifi_prepare,}, - .dai_ops = { + .prepare = ac97_hifi_prepare, .set_clkdiv = wm9713_set_dai_clkdiv, .set_pll = wm9713_set_dai_pll,}, }, @@ -1056,8 +1059,7 @@ struct snd_soc_dai wm9713_dai[] = { .rates = WM9713_RATES, .formats = SNDRV_PCM_FMTBIT_S16_LE,}, .ops = { - .prepare = ac97_aux_prepare,}, - .dai_ops = { + .prepare = ac97_aux_prepare, .set_clkdiv = wm9713_set_dai_clkdiv, .set_pll = wm9713_set_dai_pll,}, }, @@ -1077,8 +1079,7 @@ struct snd_soc_dai wm9713_dai[] = { .formats = WM9713_PCM_FORMATS,}, .ops = { .hw_params = wm9713_pcm_hw_params, - .shutdown = wm9713_voiceshutdown,}, - .dai_ops = { + .shutdown = wm9713_voiceshutdown, .set_clkdiv = wm9713_set_dai_clkdiv, .set_pll = wm9713_set_dai_pll, .set_fmt = wm9713_set_dai_fmt, diff --git a/sound/soc/davinci/davinci-i2s.c b/sound/soc/davinci/davinci-i2s.c index 95df51e803b4..7a17cd0ecf64 100644 --- a/sound/soc/davinci/davinci-i2s.c +++ b/sound/soc/davinci/davinci-i2s.c @@ -188,7 +188,8 @@ static void davinci_mcbsp_stop(struct snd_pcm_substream *substream) davinci_mcbsp_write_reg(dev, DAVINCI_MCBSP_SPCR_REG, w); } -static int davinci_i2s_startup(struct snd_pcm_substream *substream) +static int davinci_i2s_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -285,7 +286,8 @@ static int davinci_i2s_set_dai_fmt(struct snd_soc_dai *cpu_dai, } static int davinci_i2s_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct davinci_pcm_dma_params *dma_params = rtd->dai->cpu_dai->dma_data; @@ -349,7 +351,8 @@ static int davinci_i2s_hw_params(struct snd_pcm_substream *substream, return 0; } -static int davinci_i2s_trigger(struct snd_pcm_substream *substream, int cmd) +static int davinci_i2s_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *dai) { int ret = 0; @@ -473,8 +476,7 @@ struct snd_soc_dai davinci_i2s_dai = { .ops = { .startup = davinci_i2s_startup, .trigger = davinci_i2s_trigger, - .hw_params = davinci_i2s_hw_params,}, - .dai_ops = { + .hw_params = davinci_i2s_hw_params, .set_fmt = davinci_i2s_set_dai_fmt, }, }; diff --git a/sound/soc/davinci/davinci-sffsdr.c b/sound/soc/davinci/davinci-sffsdr.c index fa38f9cd3506..e95fde1766b5 100644 --- a/sound/soc/davinci/davinci-sffsdr.c +++ b/sound/soc/davinci/davinci-sffsdr.c @@ -34,7 +34,8 @@ #include "davinci-i2s.h" static int sffsdr_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 157a7895ffa1..52c290bb47bf 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -266,7 +266,8 @@ static irqreturn_t fsl_ssi_isr(int irq, void *dev_id) * If this is the first stream open, then grab the IRQ and program most of * the SSI registers. */ -static int fsl_ssi_startup(struct snd_pcm_substream *substream) +static int fsl_ssi_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct fsl_ssi_private *ssi_private = rtd->dai->cpu_dai->private_data; @@ -411,7 +412,8 @@ static int fsl_ssi_startup(struct snd_pcm_substream *substream) * Note: The SxCCR.DC and SxCCR.PM bits are only used if the SSI is the * clock master. */ -static int fsl_ssi_prepare(struct snd_pcm_substream *substream) +static int fsl_ssi_prepare(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_soc_pcm_runtime *rtd = substream->private_data; @@ -441,7 +443,8 @@ static int fsl_ssi_prepare(struct snd_pcm_substream *substream) * The DMA channel is in external master start and pause mode, which * means the SSI completely controls the flow of data. */ -static int fsl_ssi_trigger(struct snd_pcm_substream *substream, int cmd) +static int fsl_ssi_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct fsl_ssi_private *ssi_private = rtd->dai->cpu_dai->private_data; @@ -490,7 +493,8 @@ static int fsl_ssi_trigger(struct snd_pcm_substream *substream, int cmd) * * Shutdown the SSI if there are no other substreams open. */ -static void fsl_ssi_shutdown(struct snd_pcm_substream *substream) +static void fsl_ssi_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct fsl_ssi_private *ssi_private = rtd->dai->cpu_dai->private_data; @@ -578,8 +582,6 @@ static struct snd_soc_dai fsl_ssi_dai_template = { .prepare = fsl_ssi_prepare, .shutdown = fsl_ssi_shutdown, .trigger = fsl_ssi_trigger, - }, - .dai_ops = { .set_sysclk = fsl_ssi_set_sysclk, .set_fmt = fsl_ssi_set_fmt, }, diff --git a/sound/soc/fsl/mpc5200_psc_i2s.c b/sound/soc/fsl/mpc5200_psc_i2s.c index 94a02eaa4825..e2c172f38979 100644 --- a/sound/soc/fsl/mpc5200_psc_i2s.c +++ b/sound/soc/fsl/mpc5200_psc_i2s.c @@ -187,7 +187,8 @@ static irqreturn_t psc_i2s_bcom_irq(int irq, void *_psc_i2s_stream) * If this is the first stream open, then grab the IRQ and program most of * the PSC registers. */ -static int psc_i2s_startup(struct snd_pcm_substream *substream) +static int psc_i2s_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data; @@ -220,7 +221,8 @@ static int psc_i2s_startup(struct snd_pcm_substream *substream) } static int psc_i2s_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data; @@ -256,7 +258,8 @@ static int psc_i2s_hw_params(struct snd_pcm_substream *substream, return 0; } -static int psc_i2s_hw_free(struct snd_pcm_substream *substream) +static int psc_i2s_hw_free(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { snd_pcm_set_runtime_buffer(substream, NULL); return 0; @@ -268,7 +271,8 @@ static int psc_i2s_hw_free(struct snd_pcm_substream *substream) * This function is called by ALSA to start, stop, pause, and resume the DMA * transfer of data. */ -static int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd) +static int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data; @@ -383,7 +387,8 @@ static int psc_i2s_trigger(struct snd_pcm_substream *substream, int cmd) * * Shutdown the PSC if there are no other substreams open. */ -static void psc_i2s_shutdown(struct snd_pcm_substream *substream) +static void psc_i2s_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct psc_i2s *psc_i2s = rtd->dai->cpu_dai->private_data; @@ -483,8 +488,6 @@ static struct snd_soc_dai psc_i2s_dai_template = { .hw_free = psc_i2s_hw_free, .shutdown = psc_i2s_shutdown, .trigger = psc_i2s_trigger, - }, - .dai_ops = { .set_sysclk = psc_i2s_set_sysclk, .set_fmt = psc_i2s_set_fmt, }, diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c index 3d4060b00eb3..6013898f49b4 100644 --- a/sound/soc/omap/omap-mcbsp.c +++ b/sound/soc/omap/omap-mcbsp.c @@ -138,7 +138,8 @@ static const unsigned long omap34xx_mcbsp_port[][2] = { static const unsigned long omap34xx_mcbsp_port[][2] = {}; #endif -static int omap_mcbsp_dai_startup(struct snd_pcm_substream *substream) +static int omap_mcbsp_dai_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -151,7 +152,8 @@ static int omap_mcbsp_dai_startup(struct snd_pcm_substream *substream) return err; } -static void omap_mcbsp_dai_shutdown(struct snd_pcm_substream *substream) +static void omap_mcbsp_dai_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -163,7 +165,8 @@ static void omap_mcbsp_dai_shutdown(struct snd_pcm_substream *substream) } } -static int omap_mcbsp_dai_trigger(struct snd_pcm_substream *substream, int cmd) +static int omap_mcbsp_dai_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -192,7 +195,8 @@ static int omap_mcbsp_dai_trigger(struct snd_pcm_substream *substream, int cmd) } static int omap_mcbsp_dai_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -470,8 +474,6 @@ static int omap_mcbsp_dai_set_dai_sysclk(struct snd_soc_dai *cpu_dai, .shutdown = omap_mcbsp_dai_shutdown, \ .trigger = omap_mcbsp_dai_trigger, \ .hw_params = omap_mcbsp_dai_hw_params, \ - }, \ - .dai_ops = { \ .set_fmt = omap_mcbsp_dai_set_dai_fmt, \ .set_clkdiv = omap_mcbsp_dai_set_clkdiv, \ .set_sysclk = omap_mcbsp_dai_set_dai_sysclk, \ diff --git a/sound/soc/omap/omap2evm.c b/sound/soc/omap/omap2evm.c index 5bea31157a94..7b160f9d83f9 100644 --- a/sound/soc/omap/omap2evm.c +++ b/sound/soc/omap/omap2evm.c @@ -38,7 +38,8 @@ #include "../codecs/twl4030.h" static int omap2evm_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *codec_dai = rtd->dai->codec_dai; diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index e2b54b88c380..d0dd6245a20a 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -212,7 +212,8 @@ static struct pxa2xx_pcm_dma_params *ssp_dma_params[4][4] = { }, }; -static int pxa_ssp_startup(struct snd_pcm_substream *substream) +static int pxa_ssp_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -228,7 +229,8 @@ static int pxa_ssp_startup(struct snd_pcm_substream *substream) return ret; } -static void pxa_ssp_shutdown(struct snd_pcm_substream *substream) +static void pxa_ssp_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -604,7 +606,8 @@ static int pxa_ssp_set_dai_fmt(struct snd_soc_dai *cpu_dai, * Can be called multiple times by oss emulation. */ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -678,7 +681,8 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream, return 0; } -static int pxa_ssp_trigger(struct snd_pcm_substream *substream, int cmd) +static int pxa_ssp_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -806,8 +810,6 @@ struct snd_soc_dai pxa_ssp_dai[] = { .shutdown = pxa_ssp_shutdown, .trigger = pxa_ssp_trigger, .hw_params = pxa_ssp_hw_params, - }, - .dai_ops = { .set_sysclk = pxa_ssp_set_dai_sysclk, .set_clkdiv = pxa_ssp_set_dai_clkdiv, .set_pll = pxa_ssp_set_dai_pll, @@ -840,8 +842,6 @@ struct snd_soc_dai pxa_ssp_dai[] = { .shutdown = pxa_ssp_shutdown, .trigger = pxa_ssp_trigger, .hw_params = pxa_ssp_hw_params, - }, - .dai_ops = { .set_sysclk = pxa_ssp_set_dai_sysclk, .set_clkdiv = pxa_ssp_set_dai_clkdiv, .set_pll = pxa_ssp_set_dai_pll, @@ -875,8 +875,6 @@ struct snd_soc_dai pxa_ssp_dai[] = { .shutdown = pxa_ssp_shutdown, .trigger = pxa_ssp_trigger, .hw_params = pxa_ssp_hw_params, - }, - .dai_ops = { .set_sysclk = pxa_ssp_set_dai_sysclk, .set_clkdiv = pxa_ssp_set_dai_clkdiv, .set_pll = pxa_ssp_set_dai_pll, @@ -910,8 +908,6 @@ struct snd_soc_dai pxa_ssp_dai[] = { .shutdown = pxa_ssp_shutdown, .trigger = pxa_ssp_trigger, .hw_params = pxa_ssp_hw_params, - }, - .dai_ops = { .set_sysclk = pxa_ssp_set_dai_sysclk, .set_clkdiv = pxa_ssp_set_dai_clkdiv, .set_pll = pxa_ssp_set_dai_pll, diff --git a/sound/soc/pxa/pxa2xx-ac97.c b/sound/soc/pxa/pxa2xx-ac97.c index a7a3a9c5c6ff..86667d2f1b75 100644 --- a/sound/soc/pxa/pxa2xx-ac97.c +++ b/sound/soc/pxa/pxa2xx-ac97.c @@ -117,7 +117,8 @@ static void pxa2xx_ac97_remove(struct platform_device *pdev, } static int pxa2xx_ac97_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -131,7 +132,8 @@ static int pxa2xx_ac97_hw_params(struct snd_pcm_substream *substream, } static int pxa2xx_ac97_hw_aux_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -145,7 +147,8 @@ static int pxa2xx_ac97_hw_aux_params(struct snd_pcm_substream *substream, } static int pxa2xx_ac97_hw_mic_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c index e758034db5c3..9a3e55b48129 100644 --- a/sound/soc/pxa/pxa2xx-i2s.c +++ b/sound/soc/pxa/pxa2xx-i2s.c @@ -121,7 +121,8 @@ static struct pxa2xx_gpio gpio_bus[] = { }, }; -static int pxa2xx_i2s_startup(struct snd_pcm_substream *substream) +static int pxa2xx_i2s_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -187,7 +188,8 @@ static int pxa2xx_i2s_set_dai_sysclk(struct snd_soc_dai *cpu_dai, } static int pxa2xx_i2s_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -248,7 +250,8 @@ static int pxa2xx_i2s_hw_params(struct snd_pcm_substream *substream, return 0; } -static int pxa2xx_i2s_trigger(struct snd_pcm_substream *substream, int cmd) +static int pxa2xx_i2s_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *dai) { int ret = 0; @@ -269,7 +272,8 @@ static int pxa2xx_i2s_trigger(struct snd_pcm_substream *substream, int cmd) return ret; } -static void pxa2xx_i2s_shutdown(struct snd_pcm_substream *substream) +static void pxa2xx_i2s_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { SACR1 |= SACR1_DRPL; @@ -353,8 +357,7 @@ struct snd_soc_dai pxa_i2s_dai = { .startup = pxa2xx_i2s_startup, .shutdown = pxa2xx_i2s_shutdown, .trigger = pxa2xx_i2s_trigger, - .hw_params = pxa2xx_i2s_hw_params,}, - .dai_ops = { + .hw_params = pxa2xx_i2s_hw_params, .set_fmt = pxa2xx_i2s_set_dai_fmt, .set_sysclk = pxa2xx_i2s_set_dai_sysclk, }, diff --git a/sound/soc/s3c24xx/s3c2412-i2s.c b/sound/soc/s3c24xx/s3c2412-i2s.c index ded7d995a922..360cc2a49d9d 100644 --- a/sound/soc/s3c24xx/s3c2412-i2s.c +++ b/sound/soc/s3c24xx/s3c2412-i2s.c @@ -343,7 +343,8 @@ static int s3c2412_i2s_set_fmt(struct snd_soc_dai *cpu_dai, } static int s3c2412_i2s_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; u32 iismod; @@ -373,7 +374,8 @@ static int s3c2412_i2s_hw_params(struct snd_pcm_substream *substream, return 0; } -static int s3c2412_i2s_trigger(struct snd_pcm_substream *substream, int cmd) +static int s3c2412_i2s_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *dai) { int capture = (substream->stream == SNDRV_PCM_STREAM_CAPTURE); unsigned long irqs; @@ -730,8 +732,6 @@ struct snd_soc_dai s3c2412_i2s_dai = { .ops = { .trigger = s3c2412_i2s_trigger, .hw_params = s3c2412_i2s_hw_params, - }, - .dai_ops = { .set_fmt = s3c2412_i2s_set_fmt, .set_clkdiv = s3c2412_i2s_set_clkdiv, .set_sysclk = s3c2412_i2s_set_sysclk, diff --git a/sound/soc/s3c24xx/s3c2443-ac97.c b/sound/soc/s3c24xx/s3c2443-ac97.c index 19c5c3cf5d8c..31377821b2c5 100644 --- a/sound/soc/s3c24xx/s3c2443-ac97.c +++ b/sound/soc/s3c24xx/s3c2443-ac97.c @@ -271,7 +271,8 @@ static void s3c2443_ac97_remove(struct platform_device *pdev, } static int s3c2443_ac97_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -313,7 +314,8 @@ static int s3c2443_ac97_trigger(struct snd_pcm_substream *substream, int cmd) } static int s3c2443_ac97_hw_mic_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_dai *cpu_dai = rtd->dai->cpu_dai; @@ -327,7 +329,7 @@ static int s3c2443_ac97_hw_mic_params(struct snd_pcm_substream *substream, } static int s3c2443_ac97_mic_trigger(struct snd_pcm_substream *substream, - int cmd) + int cmd, struct snd_soc_dai *dai) { u32 ac_glbctrl; diff --git a/sound/soc/s3c24xx/s3c24xx-i2s.c b/sound/soc/s3c24xx/s3c24xx-i2s.c index c18977bceaf2..1bac9dd3dbd4 100644 --- a/sound/soc/s3c24xx/s3c24xx-i2s.c +++ b/sound/soc/s3c24xx/s3c24xx-i2s.c @@ -243,7 +243,8 @@ static int s3c24xx_i2s_set_fmt(struct snd_soc_dai *cpu_dai, } static int s3c24xx_i2s_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; u32 iismod; @@ -279,7 +280,8 @@ static int s3c24xx_i2s_hw_params(struct snd_pcm_substream *substream, return 0; } -static int s3c24xx_i2s_trigger(struct snd_pcm_substream *substream, int cmd) +static int s3c24xx_i2s_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *dai) { int ret = 0; @@ -475,8 +477,7 @@ struct snd_soc_dai s3c24xx_i2s_dai = { .formats = SNDRV_PCM_FMTBIT_S8 | SNDRV_PCM_FMTBIT_S16_LE,}, .ops = { .trigger = s3c24xx_i2s_trigger, - .hw_params = s3c24xx_i2s_hw_params,}, - .dai_ops = { + .hw_params = s3c24xx_i2s_hw_params, .set_fmt = s3c24xx_i2s_set_fmt, .set_clkdiv = s3c24xx_i2s_set_clkdiv, .set_sysclk = s3c24xx_i2s_set_sysclk, diff --git a/sound/soc/sh/hac.c b/sound/soc/sh/hac.c index df7bc345c320..3318071dc80f 100644 --- a/sound/soc/sh/hac.c +++ b/sound/soc/sh/hac.c @@ -236,7 +236,8 @@ struct snd_ac97_bus_ops soc_ac97_ops = { EXPORT_SYMBOL_GPL(soc_ac97_ops); static int hac_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct hac_priv *hac = &hac_cpu_data[rtd->dai->cpu_dai->id]; diff --git a/sound/soc/sh/ssi.c b/sound/soc/sh/ssi.c index 55c3464163ab..52a233840d27 100644 --- a/sound/soc/sh/ssi.c +++ b/sound/soc/sh/ssi.c @@ -89,7 +89,8 @@ struct ssi_priv { * track usage of the SSI; it is simplex-only so prevent attempts of * concurrent playback + capture. FIXME: any locking required? */ -static int ssi_startup(struct snd_pcm_substream *substream) +static int ssi_startup(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct ssi_priv *ssi = &ssi_cpu_data[rtd->dai->cpu_dai->id]; @@ -101,7 +102,8 @@ static int ssi_startup(struct snd_pcm_substream *substream) return 0; } -static void ssi_shutdown(struct snd_pcm_substream *substream) +static void ssi_shutdown(struct snd_pcm_substream *substream, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct ssi_priv *ssi = &ssi_cpu_data[rtd->dai->cpu_dai->id]; @@ -109,7 +111,8 @@ static void ssi_shutdown(struct snd_pcm_substream *substream) ssi->inuse = 0; } -static int ssi_trigger(struct snd_pcm_substream *substream, int cmd) +static int ssi_trigger(struct snd_pcm_substream *substream, int cmd, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct ssi_priv *ssi = &ssi_cpu_data[rtd->dai->cpu_dai->id]; @@ -129,7 +132,8 @@ static int ssi_trigger(struct snd_pcm_substream *substream, int cmd) } static int ssi_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params) + struct snd_pcm_hw_params *params, + struct snd_soc_dai *dai) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct ssi_priv *ssi = &ssi_cpu_data[rtd->dai->cpu_dai->id]; @@ -354,8 +358,6 @@ struct snd_soc_dai sh4_ssi_dai[] = { .shutdown = ssi_shutdown, .trigger = ssi_trigger, .hw_params = ssi_hw_params, - }, - .dai_ops = { .set_sysclk = ssi_set_sysclk, .set_clkdiv = ssi_set_clkdiv, .set_fmt = ssi_set_fmt, @@ -383,8 +385,6 @@ struct snd_soc_dai sh4_ssi_dai[] = { .shutdown = ssi_shutdown, .trigger = ssi_trigger, .hw_params = ssi_hw_params, - }, - .dai_ops = { .set_sysclk = ssi_set_sysclk, .set_clkdiv = ssi_set_clkdiv, .set_fmt = ssi_set_fmt, diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index c5cb9516fea4..43f4060dbe75 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -134,7 +134,7 @@ static int soc_pcm_open(struct snd_pcm_substream *substream) /* startup the audio subsystem */ if (cpu_dai->ops.startup) { - ret = cpu_dai->ops.startup(substream); + ret = cpu_dai->ops.startup(substream, cpu_dai); if (ret < 0) { printk(KERN_ERR "asoc: can't open interface %s\n", cpu_dai->name); @@ -151,7 +151,7 @@ static int soc_pcm_open(struct snd_pcm_substream *substream) } if (codec_dai->ops.startup) { - ret = codec_dai->ops.startup(substream); + ret = codec_dai->ops.startup(substream, codec_dai); if (ret < 0) { printk(KERN_ERR "asoc: can't open codec %s\n", codec_dai->name); @@ -248,7 +248,7 @@ codec_dai_err: platform_err: if (cpu_dai->ops.shutdown) - cpu_dai->ops.shutdown(substream); + cpu_dai->ops.shutdown(substream, cpu_dai); out: mutex_unlock(&pcm_mutex); return ret; @@ -339,10 +339,10 @@ static int soc_codec_close(struct snd_pcm_substream *substream) snd_soc_dai_digital_mute(codec_dai, 1); if (cpu_dai->ops.shutdown) - cpu_dai->ops.shutdown(substream); + cpu_dai->ops.shutdown(substream, cpu_dai); if (codec_dai->ops.shutdown) - codec_dai->ops.shutdown(substream); + codec_dai->ops.shutdown(substream, codec_dai); if (machine->ops && machine->ops->shutdown) machine->ops->shutdown(substream); @@ -406,7 +406,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream) } if (codec_dai->ops.prepare) { - ret = codec_dai->ops.prepare(substream); + ret = codec_dai->ops.prepare(substream, codec_dai); if (ret < 0) { printk(KERN_ERR "asoc: codec DAI prepare error\n"); goto out; @@ -414,7 +414,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream) } if (cpu_dai->ops.prepare) { - ret = cpu_dai->ops.prepare(substream); + ret = cpu_dai->ops.prepare(substream, cpu_dai); if (ret < 0) { printk(KERN_ERR "asoc: cpu DAI prepare error\n"); goto out; @@ -491,7 +491,7 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream, } if (codec_dai->ops.hw_params) { - ret = codec_dai->ops.hw_params(substream, params); + ret = codec_dai->ops.hw_params(substream, params, codec_dai); if (ret < 0) { printk(KERN_ERR "asoc: can't set codec %s hw params\n", codec_dai->name); @@ -500,7 +500,7 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream, } if (cpu_dai->ops.hw_params) { - ret = cpu_dai->ops.hw_params(substream, params); + ret = cpu_dai->ops.hw_params(substream, params, cpu_dai); if (ret < 0) { printk(KERN_ERR "asoc: interface %s hw params failed\n", cpu_dai->name); @@ -523,11 +523,11 @@ out: platform_err: if (cpu_dai->ops.hw_free) - cpu_dai->ops.hw_free(substream); + cpu_dai->ops.hw_free(substream, cpu_dai); interface_err: if (codec_dai->ops.hw_free) - codec_dai->ops.hw_free(substream); + codec_dai->ops.hw_free(substream, codec_dai); codec_err: if (machine->ops && machine->ops->hw_free) @@ -566,10 +566,10 @@ static int soc_pcm_hw_free(struct snd_pcm_substream *substream) /* now free hw params for the DAI's */ if (codec_dai->ops.hw_free) - codec_dai->ops.hw_free(substream); + codec_dai->ops.hw_free(substream, codec_dai); if (cpu_dai->ops.hw_free) - cpu_dai->ops.hw_free(substream); + cpu_dai->ops.hw_free(substream, cpu_dai); mutex_unlock(&pcm_mutex); return 0; @@ -586,7 +586,7 @@ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd) int ret; if (codec_dai->ops.trigger) { - ret = codec_dai->ops.trigger(substream, cmd); + ret = codec_dai->ops.trigger(substream, cmd, codec_dai); if (ret < 0) return ret; } @@ -598,7 +598,7 @@ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd) } if (cpu_dai->ops.trigger) { - ret = cpu_dai->ops.trigger(substream, cmd); + ret = cpu_dai->ops.trigger(substream, cmd, cpu_dai); if (ret < 0) return ret; } @@ -637,10 +637,10 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) snd_power_change_state(codec->card, SNDRV_CTL_POWER_D3hot); /* mute any active DAC's */ - for (i = 0; i < machine->num_links; i++) { - struct snd_soc_dai *dai = machine->dai_link[i].codec_dai; - if (dai->dai_ops.digital_mute && dai->playback.active) - dai->dai_ops.digital_mute(dai, 1); + for (i = 0; i < card->num_links; i++) { + struct snd_soc_dai *dai = card->dai_link[i].codec_dai; + if (dai->ops.digital_mute && dai->playback.active) + dai->ops.digital_mute(dai, 1); } /* suspend all pcms */ @@ -733,10 +733,10 @@ static void soc_resume_deferred(struct work_struct *work) } /* unmute any active DACs */ - for (i = 0; i < machine->num_links; i++) { - struct snd_soc_dai *dai = machine->dai_link[i].codec_dai; - if (dai->dai_ops.digital_mute && dai->playback.active) - dai->dai_ops.digital_mute(dai, 0); + for (i = 0; i < card->num_links; i++) { + struct snd_soc_dai *dai = card->dai_link[i].codec_dai; + if (dai->ops.digital_mute && dai->playback.active) + dai->ops.digital_mute(dai, 0); } for (i = 0; i < card->num_links; i++) { @@ -1849,8 +1849,8 @@ EXPORT_SYMBOL_GPL(snd_soc_put_volsw_s8); int snd_soc_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id, unsigned int freq, int dir) { - if (dai->dai_ops.set_sysclk) - return dai->dai_ops.set_sysclk(dai, clk_id, freq, dir); + if (dai->ops.set_sysclk) + return dai->ops.set_sysclk(dai, clk_id, freq, dir); else return -EINVAL; } @@ -1869,8 +1869,8 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_sysclk); int snd_soc_dai_set_clkdiv(struct snd_soc_dai *dai, int div_id, int div) { - if (dai->dai_ops.set_clkdiv) - return dai->dai_ops.set_clkdiv(dai, div_id, div); + if (dai->ops.set_clkdiv) + return dai->ops.set_clkdiv(dai, div_id, div); else return -EINVAL; } @@ -1888,8 +1888,8 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_clkdiv); int snd_soc_dai_set_pll(struct snd_soc_dai *dai, int pll_id, unsigned int freq_in, unsigned int freq_out) { - if (dai->dai_ops.set_pll) - return dai->dai_ops.set_pll(dai, pll_id, freq_in, freq_out); + if (dai->ops.set_pll) + return dai->ops.set_pll(dai, pll_id, freq_in, freq_out); else return -EINVAL; } @@ -1905,8 +1905,8 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_pll); */ int snd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) { - if (dai->dai_ops.set_fmt) - return dai->dai_ops.set_fmt(dai, fmt); + if (dai->ops.set_fmt) + return dai->ops.set_fmt(dai, fmt); else return -EINVAL; } @@ -1924,8 +1924,8 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_fmt); int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai, unsigned int mask, int slots) { - if (dai->dai_ops.set_sysclk) - return dai->dai_ops.set_tdm_slot(dai, mask, slots); + if (dai->ops.set_sysclk) + return dai->ops.set_tdm_slot(dai, mask, slots); else return -EINVAL; } @@ -1940,8 +1940,8 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_tdm_slot); */ int snd_soc_dai_set_tristate(struct snd_soc_dai *dai, int tristate) { - if (dai->dai_ops.set_sysclk) - return dai->dai_ops.set_tristate(dai, tristate); + if (dai->ops.set_sysclk) + return dai->ops.set_tristate(dai, tristate); else return -EINVAL; } @@ -1956,8 +1956,8 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_tristate); */ int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute) { - if (dai->dai_ops.digital_mute) - return dai->dai_ops.digital_mute(dai, mute); + if (dai->ops.digital_mute) + return dai->ops.digital_mute(dai, mute); else return -EINVAL; } -- cgit v1.2.3 From 4821277f36e008b531728e359fbbedb229117f4b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 3 Nov 2008 21:05:01 +0100 Subject: mac80211: fix BUILD_BUG_ON() caused by misalignment on arm On ARM alignment is done slightly different from other architectures. struct ieee80211_tx_rate is aligned to word size, even though it only has 3 single-byte members, which triggers the BUILD_BUG_ON in ieee80211_tx_info_clear_status This patch marks the struct ieee80211_tx_rate as packed, so that ARM behaves like the other architectures. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 1b8ed421feaa..c813fbf7d364 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -307,7 +307,7 @@ struct ieee80211_tx_rate { s8 idx; u8 count; u8 flags; -}; +} __attribute__((packed)); /** * struct ieee80211_tx_info - skb transmit information -- cgit v1.2.3 From 0ed94eaaed618634f68197161203aac9f849471e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 7 Nov 2008 19:50:42 -0800 Subject: mac80211: remove more excess kernel-doc Delete kernel-doc struct descriptions for fields that don't exist: Warning(include/net/mac80211.h:1263): Excess struct/union/enum/typedef member 'conf_ht' description in 'ieee80211_ops' Warning(net/mac80211/sta_info.h:309): Excess struct/union/enum/typedef member 'addr' description in 'sta_info' Warning(net/mac80211/sta_info.h:309): Excess struct/union/enum/typedef member 'aid' description in 'sta_info' Signed-off-by: Randy Dunlap cc: Johannes Berg cc: John W. Linville Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 -- net/mac80211/sta_info.h | 3 --- 2 files changed, 5 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c813fbf7d364..0dbbfc7b0509 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1269,8 +1269,6 @@ enum ieee80211_ampdu_mlme_action { * This is needed only for IBSS mode and the result of this function is * used to determine whether to reply to Probe Requests. * - * @conf_ht: Configures low level driver with 802.11n HT data. Must be atomic. - * * @ampdu_action: Perform a certain A-MPDU action * The RA/TID combination determines the destination and TID we want * the ampdu action to be performed for. The action is defined through diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 5ad9250b63ab..dc2606d0ae77 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -169,9 +169,6 @@ struct sta_ampdu_mlme { * @lock: used for locking all fields that require locking, see comments * in the header file. * @flaglock: spinlock for flags accesses - * @addr: MAC address of this STA - * @aid: STA's unique AID (1..2007, 0 = not assigned yet), - * only used in AP (and IBSS?) mode * @listen_interval: listen interval of this station, when we're acting as AP * @pin_status: used internally for pinning a STA struct into memory * @flags: STA flags, see &enum ieee80211_sta_info_flags -- cgit v1.2.3 From 274bfb8dc5ffa16cb073801bebe76ab7f4e2e73d Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 29 Oct 2008 11:35:05 -0400 Subject: lib80211: absorb crypto bits from net/ieee80211 These bits are shared already between ipw2x00 and hostap, and could probably be shared both more cleanly and with other drivers. This commit simply relocates the code to lib80211 and adjusts the drivers appropriately. Signed-off-by: John W. Linville --- drivers/net/wireless/hostap/Kconfig | 6 +- drivers/net/wireless/hostap/hostap.h | 2 +- drivers/net/wireless/hostap/hostap_80211.h | 2 +- drivers/net/wireless/hostap/hostap_80211_rx.c | 10 +- drivers/net/wireless/hostap/hostap_80211_tx.c | 8 +- drivers/net/wireless/hostap/hostap_ap.c | 12 +- drivers/net/wireless/hostap/hostap_ap.h | 8 +- drivers/net/wireless/hostap/hostap_hw.c | 36 +- drivers/net/wireless/hostap/hostap_ioctl.c | 110 ++-- drivers/net/wireless/hostap/hostap_main.c | 19 +- drivers/net/wireless/hostap/hostap_proc.c | 20 +- drivers/net/wireless/hostap/hostap_wlan.h | 6 +- drivers/net/wireless/ipw2100.c | 12 +- drivers/net/wireless/ipw2200.c | 12 +- include/net/ieee80211.h | 10 +- include/net/ieee80211_crypt.h | 108 ---- include/net/lib80211.h | 108 ++++ net/ieee80211/Kconfig | 15 +- net/ieee80211/Makefile | 4 - net/ieee80211/ieee80211_crypt.c | 206 ------- net/ieee80211/ieee80211_crypt_ccmp.c | 492 ---------------- net/ieee80211/ieee80211_crypt_tkip.c | 782 ------------------------- net/ieee80211/ieee80211_crypt_wep.c | 295 ---------- net/ieee80211/ieee80211_module.c | 23 +- net/ieee80211/ieee80211_rx.c | 8 +- net/ieee80211/ieee80211_tx.c | 7 +- net/ieee80211/ieee80211_wx.c | 68 +-- net/wireless/Kconfig | 9 + net/wireless/Makefile | 3 + net/wireless/lib80211.c | 191 ++++++- net/wireless/lib80211_crypt_ccmp.c | 492 ++++++++++++++++ net/wireless/lib80211_crypt_tkip.c | 784 ++++++++++++++++++++++++++ net/wireless/lib80211_crypt_wep.c | 296 ++++++++++ 33 files changed, 2075 insertions(+), 2089 deletions(-) delete mode 100644 include/net/ieee80211_crypt.h delete mode 100644 net/ieee80211/ieee80211_crypt.c delete mode 100644 net/ieee80211/ieee80211_crypt_ccmp.c delete mode 100644 net/ieee80211/ieee80211_crypt_tkip.c delete mode 100644 net/ieee80211/ieee80211_crypt_wep.c create mode 100644 net/wireless/lib80211_crypt_ccmp.c create mode 100644 net/wireless/lib80211_crypt_tkip.c create mode 100644 net/wireless/lib80211_crypt_wep.c (limited to 'include') diff --git a/drivers/net/wireless/hostap/Kconfig b/drivers/net/wireless/hostap/Kconfig index 1fef33169fdd..87bbd4db4bad 100644 --- a/drivers/net/wireless/hostap/Kconfig +++ b/drivers/net/wireless/hostap/Kconfig @@ -2,8 +2,10 @@ config HOSTAP tristate "IEEE 802.11 for Host AP (Prism2/2.5/3 and WEP/TKIP/CCMP)" depends on WLAN_80211 select WIRELESS_EXT - select IEEE80211 - select IEEE80211_CRYPT_WEP + select LIB80211 + select LIB80211_CRYPT_WEP + select LIB80211_CRYPT_TKIP + select LIB80211_CRYPT_CCMP ---help--- Shared driver code for IEEE 802.11b wireless cards based on Intersil Prism2/2.5/3 chipset. This driver supports so called diff --git a/drivers/net/wireless/hostap/hostap.h b/drivers/net/wireless/hostap/hostap.h index 3a386a636cca..2453deaa3e00 100644 --- a/drivers/net/wireless/hostap/hostap.h +++ b/drivers/net/wireless/hostap/hostap.h @@ -63,7 +63,7 @@ void ap_control_flush_macs(struct mac_restrictions *mac_restrictions); int ap_control_kick_mac(struct ap_data *ap, struct net_device *dev, u8 *mac); void ap_control_kickall(struct ap_data *ap); void * ap_crypt_get_ptrs(struct ap_data *ap, u8 *addr, int permanent, - struct ieee80211_crypt_data ***crypt); + struct lib80211_crypt_data ***crypt); int prism2_ap_get_sta_qual(local_info_t *local, struct sockaddr addr[], struct iw_quality qual[], int buf_size, int aplist); diff --git a/drivers/net/wireless/hostap/hostap_80211.h b/drivers/net/wireless/hostap/hostap_80211.h index 3694b1eba521..3a9474d9a907 100644 --- a/drivers/net/wireless/hostap/hostap_80211.h +++ b/drivers/net/wireless/hostap/hostap_80211.h @@ -2,7 +2,7 @@ #define HOSTAP_80211_H #include -#include +#include struct hostap_ieee80211_mgmt { __le16 frame_control; diff --git a/drivers/net/wireless/hostap/hostap_80211_rx.c b/drivers/net/wireless/hostap/hostap_80211_rx.c index 5f64461aa54e..19b1bf0478bd 100644 --- a/drivers/net/wireless/hostap/hostap_80211_rx.c +++ b/drivers/net/wireless/hostap/hostap_80211_rx.c @@ -1,5 +1,5 @@ #include -#include +#include #include "hostap_80211.h" #include "hostap.h" @@ -649,7 +649,7 @@ static int hostap_is_eapol_frame(local_info_t *local, struct sk_buff *skb) /* Called only as a tasklet (software IRQ) */ static int hostap_rx_frame_decrypt(local_info_t *local, struct sk_buff *skb, - struct ieee80211_crypt_data *crypt) + struct lib80211_crypt_data *crypt) { struct ieee80211_hdr_4addr *hdr; int res, hdrlen; @@ -687,7 +687,7 @@ hostap_rx_frame_decrypt(local_info_t *local, struct sk_buff *skb, /* Called only as a tasklet (software IRQ) */ static int hostap_rx_frame_decrypt_msdu(local_info_t *local, struct sk_buff *skb, - int keyidx, struct ieee80211_crypt_data *crypt) + int keyidx, struct lib80211_crypt_data *crypt) { struct ieee80211_hdr_4addr *hdr; int res, hdrlen; @@ -733,7 +733,7 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb, int from_assoc_ap = 0; u8 dst[ETH_ALEN]; u8 src[ETH_ALEN]; - struct ieee80211_crypt_data *crypt = NULL; + struct lib80211_crypt_data *crypt = NULL; void *sta = NULL; int keyidx = 0; @@ -785,7 +785,7 @@ void hostap_80211_rx(struct net_device *dev, struct sk_buff *skb, int idx = 0; if (skb->len >= hdrlen + 3) idx = skb->data[hdrlen + 3] >> 6; - crypt = local->crypt[idx]; + crypt = local->crypt_info.crypt[idx]; sta = NULL; /* Use station specific key to override default keys if the diff --git a/drivers/net/wireless/hostap/hostap_80211_tx.c b/drivers/net/wireless/hostap/hostap_80211_tx.c index 075247188e64..078a010f39a0 100644 --- a/drivers/net/wireless/hostap/hostap_80211_tx.c +++ b/drivers/net/wireless/hostap/hostap_80211_tx.c @@ -306,7 +306,7 @@ int hostap_mgmt_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Called only from software IRQ */ static struct sk_buff * hostap_tx_encrypt(struct sk_buff *skb, - struct ieee80211_crypt_data *crypt) + struct lib80211_crypt_data *crypt) { struct hostap_interface *iface; local_info_t *local; @@ -405,7 +405,7 @@ int hostap_master_start_xmit(struct sk_buff *skb, struct net_device *dev) if (local->host_encrypt) { /* Set crypt to default algorithm and key; will be replaced in * AP code if STA has own alg/key */ - tx.crypt = local->crypt[local->tx_keyidx]; + tx.crypt = local->crypt_info.crypt[local->crypt_info.tx_keyidx]; tx.host_encrypt = 1; } else { tx.crypt = NULL; @@ -487,7 +487,9 @@ int hostap_master_start_xmit(struct sk_buff *skb, struct net_device *dev) if (tx.crypt && (!tx.crypt->ops || !tx.crypt->ops->encrypt_mpdu)) tx.crypt = NULL; - else if ((tx.crypt || local->crypt[local->tx_keyidx]) && !no_encrypt) { + else if ((tx.crypt || + local->crypt_info.crypt[local->crypt_info.tx_keyidx]) && + !no_encrypt) { /* Add ISWEP flag both for firmware and host based encryption */ fc |= IEEE80211_FCTL_PROTECTED; diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c index dec3dbe1bf8f..0903db786d5f 100644 --- a/drivers/net/wireless/hostap/hostap_ap.c +++ b/drivers/net/wireless/hostap/hostap_ap.c @@ -1206,7 +1206,7 @@ static void prism2_check_tx_rates(struct sta_info *sta) static void ap_crypt_init(struct ap_data *ap) { - ap->crypt = ieee80211_get_crypto_ops("WEP"); + ap->crypt = lib80211_get_crypto_ops("WEP"); if (ap->crypt) { if (ap->crypt->init) { @@ -1224,7 +1224,7 @@ static void ap_crypt_init(struct ap_data *ap) if (ap->crypt == NULL) { printk(KERN_WARNING "AP could not initialize WEP: load module " - "ieee80211_crypt_wep.ko\n"); + "lib80211_crypt_wep.ko\n"); } } @@ -1293,7 +1293,7 @@ static void handle_authen(local_info_t *local, struct sk_buff *skb, __le16 *pos; u16 resp = WLAN_STATUS_SUCCESS, fc; struct sta_info *sta = NULL; - struct ieee80211_crypt_data *crypt; + struct lib80211_crypt_data *crypt; char *txt = ""; len = skb->len - IEEE80211_MGMT_HDR_LEN; @@ -1319,7 +1319,7 @@ static void handle_authen(local_info_t *local, struct sk_buff *skb, int idx = 0; if (skb->len >= hdrlen + 3) idx = skb->data[hdrlen + 3] >> 6; - crypt = local->crypt[idx]; + crypt = local->crypt_info.crypt[idx]; } pos = (__le16 *) (skb->data + IEEE80211_MGMT_HDR_LEN); @@ -3065,7 +3065,7 @@ ap_rx_ret hostap_handle_sta_rx(local_info_t *local, struct net_device *dev, /* Called only as a tasklet (software IRQ) */ int hostap_handle_sta_crypto(local_info_t *local, struct ieee80211_hdr_4addr *hdr, - struct ieee80211_crypt_data **crypt, + struct lib80211_crypt_data **crypt, void **sta_ptr) { struct sta_info *sta; @@ -3213,7 +3213,7 @@ void hostap_update_rates(local_info_t *local) void * ap_crypt_get_ptrs(struct ap_data *ap, u8 *addr, int permanent, - struct ieee80211_crypt_data ***crypt) + struct lib80211_crypt_data ***crypt) { struct sta_info *sta; diff --git a/drivers/net/wireless/hostap/hostap_ap.h b/drivers/net/wireless/hostap/hostap_ap.h index 2fa2452b6b07..d36e4b175336 100644 --- a/drivers/net/wireless/hostap/hostap_ap.h +++ b/drivers/net/wireless/hostap/hostap_ap.h @@ -74,7 +74,7 @@ struct sta_info { u32 tx_since_last_failure; u32 tx_consecutive_exc; - struct ieee80211_crypt_data *crypt; + struct lib80211_crypt_data *crypt; int ap; /* whether this station is an AP */ @@ -209,7 +209,7 @@ struct ap_data { /* WEP operations for generating challenges to be used with shared key * authentication */ - struct ieee80211_crypto_ops *crypt; + struct lib80211_crypto_ops *crypt; void *crypt_priv; #endif /* PRISM2_NO_KERNEL_IEEE80211_MGMT */ }; @@ -229,7 +229,7 @@ typedef enum { struct hostap_tx_data { struct sk_buff *skb; int host_encrypt; - struct ieee80211_crypt_data *crypt; + struct lib80211_crypt_data *crypt; void *sta_ptr; }; ap_tx_ret hostap_handle_sta_tx(local_info_t *local, struct hostap_tx_data *tx); @@ -244,7 +244,7 @@ ap_rx_ret hostap_handle_sta_rx(local_info_t *local, struct net_device *dev, struct hostap_80211_rx_status *rx_stats, int wds); int hostap_handle_sta_crypto(local_info_t *local, struct ieee80211_hdr_4addr *hdr, - struct ieee80211_crypt_data **crypt, + struct lib80211_crypt_data **crypt, void **sta_ptr); int hostap_is_sta_assoc(struct ap_data *ap, u8 *sta_addr); int hostap_is_sta_authorized(struct ap_data *ap, u8 *sta_addr); diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c index fd7f7ceeac46..066299fc9259 100644 --- a/drivers/net/wireless/hostap/hostap_hw.c +++ b/drivers/net/wireless/hostap/hostap_hw.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include #include "hostap_80211.h" @@ -2791,11 +2791,12 @@ static void prism2_check_sta_fw_version(local_info_t *local) static void prism2_crypt_deinit_entries(local_info_t *local, int force) { struct list_head *ptr, *n; - struct ieee80211_crypt_data *entry; + struct lib80211_crypt_data *entry; - for (ptr = local->crypt_deinit_list.next, n = ptr->next; - ptr != &local->crypt_deinit_list; ptr = n, n = ptr->next) { - entry = list_entry(ptr, struct ieee80211_crypt_data, list); + for (ptr = local->crypt_info.crypt_deinit_list.next, n = ptr->next; + ptr != &local->crypt_info.crypt_deinit_list; + ptr = n, n = ptr->next) { + entry = list_entry(ptr, struct lib80211_crypt_data, list); if (atomic_read(&entry->refcnt) != 0 && !force) continue; @@ -2816,11 +2817,11 @@ static void prism2_crypt_deinit_handler(unsigned long data) spin_lock_irqsave(&local->lock, flags); prism2_crypt_deinit_entries(local, 0); - if (!list_empty(&local->crypt_deinit_list)) { + if (!list_empty(&local->crypt_info.crypt_deinit_list)) { printk(KERN_DEBUG "%s: entries remaining in delayed crypt " "deletion list\n", local->dev->name); - local->crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&local->crypt_deinit_timer); + local->crypt_info.crypt_deinit_timer.expires = jiffies + HZ; + add_timer(&local->crypt_info.crypt_deinit_timer); } spin_unlock_irqrestore(&local->lock, flags); @@ -3250,10 +3251,13 @@ while (0) INIT_LIST_HEAD(&local->cmd_queue); init_waitqueue_head(&local->hostscan_wq); - INIT_LIST_HEAD(&local->crypt_deinit_list); - init_timer(&local->crypt_deinit_timer); - local->crypt_deinit_timer.data = (unsigned long) local; - local->crypt_deinit_timer.function = prism2_crypt_deinit_handler; + + local->crypt_info.name = dev->name; + local->crypt_info.lock = &local->lock; + INIT_LIST_HEAD(&local->crypt_info.crypt_deinit_list); + init_timer(&local->crypt_info.crypt_deinit_timer); + local->crypt_info.crypt_deinit_timer.data = (unsigned long) local; + local->crypt_info.crypt_deinit_timer.function = prism2_crypt_deinit_handler; init_timer(&local->passive_scan_timer); local->passive_scan_timer.data = (unsigned long) local; @@ -3354,8 +3358,8 @@ static void prism2_free_local_data(struct net_device *dev) flush_scheduled_work(); - if (timer_pending(&local->crypt_deinit_timer)) - del_timer(&local->crypt_deinit_timer); + if (timer_pending(&local->crypt_info.crypt_deinit_timer)) + del_timer(&local->crypt_info.crypt_deinit_timer); prism2_crypt_deinit_entries(local, 1); if (timer_pending(&local->passive_scan_timer)) @@ -3374,12 +3378,12 @@ static void prism2_free_local_data(struct net_device *dev) prism2_callback(local, PRISM2_CALLBACK_DISABLE); for (i = 0; i < WEP_KEYS; i++) { - struct ieee80211_crypt_data *crypt = local->crypt[i]; + struct lib80211_crypt_data *crypt = local->crypt_info.crypt[i]; if (crypt) { if (crypt->ops) crypt->ops->deinit(crypt->priv); kfree(crypt); - local->crypt[i] = NULL; + local->crypt_info.crypt[i] = NULL; } } diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c index 2318c5df7a08..29aebb679099 100644 --- a/drivers/net/wireless/hostap/hostap_ioctl.c +++ b/drivers/net/wireless/hostap/hostap_ioctl.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include "hostap_wlan.h" #include "hostap.h" @@ -117,9 +117,9 @@ static int prism2_get_name(struct net_device *dev, static void prism2_crypt_delayed_deinit(local_info_t *local, - struct ieee80211_crypt_data **crypt) + struct lib80211_crypt_data **crypt) { - struct ieee80211_crypt_data *tmp; + struct lib80211_crypt_data *tmp; unsigned long flags; tmp = *crypt; @@ -133,10 +133,10 @@ static void prism2_crypt_delayed_deinit(local_info_t *local, * locking. */ spin_lock_irqsave(&local->lock, flags); - list_add(&tmp->list, &local->crypt_deinit_list); - if (!timer_pending(&local->crypt_deinit_timer)) { - local->crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&local->crypt_deinit_timer); + list_add(&tmp->list, &local->crypt_info.crypt_deinit_list); + if (!timer_pending(&local->crypt_info.crypt_deinit_timer)) { + local->crypt_info.crypt_deinit_timer.expires = jiffies + HZ; + add_timer(&local->crypt_info.crypt_deinit_timer); } spin_unlock_irqrestore(&local->lock, flags); } @@ -149,20 +149,20 @@ static int prism2_ioctl_siwencode(struct net_device *dev, struct hostap_interface *iface; local_info_t *local; int i; - struct ieee80211_crypt_data **crypt; + struct lib80211_crypt_data **crypt; iface = netdev_priv(dev); local = iface->local; i = erq->flags & IW_ENCODE_INDEX; if (i < 1 || i > 4) - i = local->tx_keyidx; + i = local->crypt_info.tx_keyidx; else i--; if (i < 0 || i >= WEP_KEYS) return -EINVAL; - crypt = &local->crypt[i]; + crypt = &local->crypt_info.crypt[i]; if (erq->flags & IW_ENCODE_DISABLED) { if (*crypt) @@ -177,17 +177,17 @@ static int prism2_ioctl_siwencode(struct net_device *dev, } if (*crypt == NULL) { - struct ieee80211_crypt_data *new_crypt; + struct lib80211_crypt_data *new_crypt; /* take WEP into use */ - new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data), + new_crypt = kzalloc(sizeof(struct lib80211_crypt_data), GFP_KERNEL); if (new_crypt == NULL) return -ENOMEM; - new_crypt->ops = ieee80211_get_crypto_ops("WEP"); + new_crypt->ops = lib80211_get_crypto_ops("WEP"); if (!new_crypt->ops) { - request_module("ieee80211_crypt_wep"); - new_crypt->ops = ieee80211_get_crypto_ops("WEP"); + request_module("lib80211_crypt_wep"); + new_crypt->ops = lib80211_get_crypto_ops("WEP"); } if (new_crypt->ops) new_crypt->priv = new_crypt->ops->init(i); @@ -210,16 +210,16 @@ static int prism2_ioctl_siwencode(struct net_device *dev, memset(keybuf + erq->length, 0, len - erq->length); (*crypt)->ops->set_key(keybuf, len, NULL, (*crypt)->priv); for (j = 0; j < WEP_KEYS; j++) { - if (j != i && local->crypt[j]) { + if (j != i && local->crypt_info.crypt[j]) { first = 0; break; } } if (first) - local->tx_keyidx = i; + local->crypt_info.tx_keyidx = i; } else { /* No key data - just set the default TX key index */ - local->tx_keyidx = i; + local->crypt_info.tx_keyidx = i; } done: @@ -252,20 +252,20 @@ static int prism2_ioctl_giwencode(struct net_device *dev, local_info_t *local; int i, len; u16 val; - struct ieee80211_crypt_data *crypt; + struct lib80211_crypt_data *crypt; iface = netdev_priv(dev); local = iface->local; i = erq->flags & IW_ENCODE_INDEX; if (i < 1 || i > 4) - i = local->tx_keyidx; + i = local->crypt_info.tx_keyidx; else i--; if (i < 0 || i >= WEP_KEYS) return -EINVAL; - crypt = local->crypt[i]; + crypt = local->crypt_info.crypt[i]; erq->flags = i + 1; if (crypt == NULL || crypt->ops == NULL) { @@ -3227,8 +3227,8 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev, local_info_t *local = iface->local; struct iw_encode_ext *ext = (struct iw_encode_ext *) extra; int i, ret = 0; - struct ieee80211_crypto_ops *ops; - struct ieee80211_crypt_data **crypt; + struct lib80211_crypto_ops *ops; + struct lib80211_crypt_data **crypt; void *sta_ptr; u8 *addr; const char *alg, *module; @@ -3237,7 +3237,7 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev, if (i > WEP_KEYS) return -EINVAL; if (i < 1 || i > WEP_KEYS) - i = local->tx_keyidx; + i = local->crypt_info.tx_keyidx; else i--; if (i < 0 || i >= WEP_KEYS) @@ -3247,7 +3247,7 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev, if (addr[0] == 0xff && addr[1] == 0xff && addr[2] == 0xff && addr[3] == 0xff && addr[4] == 0xff && addr[5] == 0xff) { sta_ptr = NULL; - crypt = &local->crypt[i]; + crypt = &local->crypt_info.crypt[i]; } else { if (i != 0) return -EINVAL; @@ -3260,7 +3260,7 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev, * is emulated by using default key idx 0. */ i = 0; - crypt = &local->crypt[i]; + crypt = &local->crypt_info.crypt[i]; } else return -EINVAL; } @@ -3276,15 +3276,15 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev, switch (ext->alg) { case IW_ENCODE_ALG_WEP: alg = "WEP"; - module = "ieee80211_crypt_wep"; + module = "lib80211_crypt_wep"; break; case IW_ENCODE_ALG_TKIP: alg = "TKIP"; - module = "ieee80211_crypt_tkip"; + module = "lib80211_crypt_tkip"; break; case IW_ENCODE_ALG_CCMP: alg = "CCMP"; - module = "ieee80211_crypt_ccmp"; + module = "lib80211_crypt_ccmp"; break; default: printk(KERN_DEBUG "%s: unsupported algorithm %d\n", @@ -3293,10 +3293,10 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev, goto done; } - ops = ieee80211_get_crypto_ops(alg); + ops = lib80211_get_crypto_ops(alg); if (ops == NULL) { request_module(module); - ops = ieee80211_get_crypto_ops(alg); + ops = lib80211_get_crypto_ops(alg); } if (ops == NULL) { printk(KERN_DEBUG "%s: unknown crypto alg '%s'\n", @@ -3315,11 +3315,11 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev, } if (*crypt == NULL || (*crypt)->ops != ops) { - struct ieee80211_crypt_data *new_crypt; + struct lib80211_crypt_data *new_crypt; prism2_crypt_delayed_deinit(local, crypt); - new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data), + new_crypt = kzalloc(sizeof(struct lib80211_crypt_data), GFP_KERNEL); if (new_crypt == NULL) { ret = -ENOMEM; @@ -3354,20 +3354,20 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev, if (ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY) { if (!sta_ptr) - local->tx_keyidx = i; + local->crypt_info.tx_keyidx = i; } if (sta_ptr == NULL && ext->key_len > 0) { int first = 1, j; for (j = 0; j < WEP_KEYS; j++) { - if (j != i && local->crypt[j]) { + if (j != i && local->crypt_info.crypt[j]) { first = 0; break; } } if (first) - local->tx_keyidx = i; + local->crypt_info.tx_keyidx = i; } done: @@ -3399,7 +3399,7 @@ static int prism2_ioctl_giwencodeext(struct net_device *dev, { struct hostap_interface *iface = netdev_priv(dev); local_info_t *local = iface->local; - struct ieee80211_crypt_data **crypt; + struct lib80211_crypt_data **crypt; void *sta_ptr; int max_key_len, i; struct iw_encode_ext *ext = (struct iw_encode_ext *) extra; @@ -3411,7 +3411,7 @@ static int prism2_ioctl_giwencodeext(struct net_device *dev, i = erq->flags & IW_ENCODE_INDEX; if (i < 1 || i > WEP_KEYS) - i = local->tx_keyidx; + i = local->crypt_info.tx_keyidx; else i--; @@ -3419,7 +3419,7 @@ static int prism2_ioctl_giwencodeext(struct net_device *dev, if (addr[0] == 0xff && addr[1] == 0xff && addr[2] == 0xff && addr[3] == 0xff && addr[4] == 0xff && addr[5] == 0xff) { sta_ptr = NULL; - crypt = &local->crypt[i]; + crypt = &local->crypt_info.crypt[i]; } else { i = 0; sta_ptr = ap_crypt_get_ptrs(local->ap, addr, 0, &crypt); @@ -3468,8 +3468,8 @@ static int prism2_ioctl_set_encryption(local_info_t *local, int param_len) { int ret = 0; - struct ieee80211_crypto_ops *ops; - struct ieee80211_crypt_data **crypt; + struct lib80211_crypto_ops *ops; + struct lib80211_crypt_data **crypt; void *sta_ptr; param->u.crypt.err = 0; @@ -3486,7 +3486,7 @@ static int prism2_ioctl_set_encryption(local_info_t *local, if (param->u.crypt.idx >= WEP_KEYS) return -EINVAL; sta_ptr = NULL; - crypt = &local->crypt[param->u.crypt.idx]; + crypt = &local->crypt_info.crypt[param->u.crypt.idx]; } else { if (param->u.crypt.idx) return -EINVAL; @@ -3507,16 +3507,16 @@ static int prism2_ioctl_set_encryption(local_info_t *local, goto done; } - ops = ieee80211_get_crypto_ops(param->u.crypt.alg); + ops = lib80211_get_crypto_ops(param->u.crypt.alg); if (ops == NULL && strcmp(param->u.crypt.alg, "WEP") == 0) { - request_module("ieee80211_crypt_wep"); - ops = ieee80211_get_crypto_ops(param->u.crypt.alg); + request_module("lib80211_crypt_wep"); + ops = lib80211_get_crypto_ops(param->u.crypt.alg); } else if (ops == NULL && strcmp(param->u.crypt.alg, "TKIP") == 0) { - request_module("ieee80211_crypt_tkip"); - ops = ieee80211_get_crypto_ops(param->u.crypt.alg); + request_module("lib80211_crypt_tkip"); + ops = lib80211_get_crypto_ops(param->u.crypt.alg); } else if (ops == NULL && strcmp(param->u.crypt.alg, "CCMP") == 0) { - request_module("ieee80211_crypt_ccmp"); - ops = ieee80211_get_crypto_ops(param->u.crypt.alg); + request_module("lib80211_crypt_ccmp"); + ops = lib80211_get_crypto_ops(param->u.crypt.alg); } if (ops == NULL) { printk(KERN_DEBUG "%s: unknown crypto alg '%s'\n", @@ -3531,11 +3531,11 @@ static int prism2_ioctl_set_encryption(local_info_t *local, local->host_decrypt = local->host_encrypt = 1; if (*crypt == NULL || (*crypt)->ops != ops) { - struct ieee80211_crypt_data *new_crypt; + struct lib80211_crypt_data *new_crypt; prism2_crypt_delayed_deinit(local, crypt); - new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data), + new_crypt = kzalloc(sizeof(struct lib80211_crypt_data), GFP_KERNEL); if (new_crypt == NULL) { ret = -ENOMEM; @@ -3568,7 +3568,7 @@ static int prism2_ioctl_set_encryption(local_info_t *local, if (param->u.crypt.flags & HOSTAP_CRYPT_FLAG_SET_TX_KEY) { if (!sta_ptr) - local->tx_keyidx = param->u.crypt.idx; + local->crypt_info.tx_keyidx = param->u.crypt.idx; else if (param->u.crypt.idx) { printk(KERN_DEBUG "%s: TX key idx setting failed\n", local->dev->name); @@ -3604,7 +3604,7 @@ static int prism2_ioctl_get_encryption(local_info_t *local, struct prism2_hostapd_param *param, int param_len) { - struct ieee80211_crypt_data **crypt; + struct lib80211_crypt_data **crypt; void *sta_ptr; int max_key_len; @@ -3620,8 +3620,8 @@ static int prism2_ioctl_get_encryption(local_info_t *local, param->sta_addr[4] == 0xff && param->sta_addr[5] == 0xff) { sta_ptr = NULL; if (param->u.crypt.idx >= WEP_KEYS) - param->u.crypt.idx = local->tx_keyidx; - crypt = &local->crypt[param->u.crypt.idx]; + param->u.crypt.idx = local->crypt_info.tx_keyidx; + crypt = &local->crypt_info.crypt[param->u.crypt.idx]; } else { param->u.crypt.idx = 0; sta_ptr = ap_crypt_get_ptrs(local->ap, param->sta_addr, 0, diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c index 4c36eb2fafd1..02a312ca8607 100644 --- a/drivers/net/wireless/hostap/hostap_main.c +++ b/drivers/net/wireless/hostap/hostap_main.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include "hostap_wlan.h" @@ -343,10 +343,11 @@ int hostap_set_encryption(local_info_t *local) char keybuf[WEP_KEY_LEN + 1]; enum { NONE, WEP, OTHER } encrypt_type; - idx = local->tx_keyidx; - if (local->crypt[idx] == NULL || local->crypt[idx]->ops == NULL) + idx = local->crypt_info.tx_keyidx; + if (local->crypt_info.crypt[idx] == NULL || + local->crypt_info.crypt[idx]->ops == NULL) encrypt_type = NONE; - else if (strcmp(local->crypt[idx]->ops->name, "WEP") == 0) + else if (strcmp(local->crypt_info.crypt[idx]->ops->name, "WEP") == 0) encrypt_type = WEP; else encrypt_type = OTHER; @@ -394,17 +395,17 @@ int hostap_set_encryption(local_info_t *local) /* 104-bit support seems to require that all the keys are set to the * same keylen */ keylen = 6; /* first 5 octets */ - len = local->crypt[idx]->ops->get_key(keybuf, sizeof(keybuf), - NULL, local->crypt[idx]->priv); + len = local->crypt_info.crypt[idx]->ops->get_key(keybuf, sizeof(keybuf), NULL, + local->crypt_info.crypt[idx]->priv); if (idx >= 0 && idx < WEP_KEYS && len > 5) keylen = WEP_KEY_LEN + 1; /* first 13 octets */ for (i = 0; i < WEP_KEYS; i++) { memset(keybuf, 0, sizeof(keybuf)); - if (local->crypt[i]) { - (void) local->crypt[i]->ops->get_key( + if (local->crypt_info.crypt[i]) { + (void) local->crypt_info.crypt[i]->ops->get_key( keybuf, sizeof(keybuf), - NULL, local->crypt[i]->priv); + NULL, local->crypt_info.crypt[i]->priv); } if (local->func->set_rid(local->dev, HFA384X_RID_CNFDEFAULTKEY0 + i, diff --git a/drivers/net/wireless/hostap/hostap_proc.c b/drivers/net/wireless/hostap/hostap_proc.c index ae7d3caf3dae..005ff25a405f 100644 --- a/drivers/net/wireless/hostap/hostap_proc.c +++ b/drivers/net/wireless/hostap/hostap_proc.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include "hostap_wlan.h" #include "hostap.h" @@ -36,9 +36,10 @@ static int prism2_debug_proc_read(char *page, char **start, off_t off, p += sprintf(p, "dev_enabled=%d\n", local->dev_enabled); p += sprintf(p, "sw_tick_stuck=%d\n", local->sw_tick_stuck); for (i = 0; i < WEP_KEYS; i++) { - if (local->crypt[i] && local->crypt[i]->ops) { - p += sprintf(p, "crypt[%d]=%s\n", - i, local->crypt[i]->ops->name); + if (local->crypt_info.crypt[i] && + local->crypt_info.crypt[i]->ops) { + p += sprintf(p, "crypt[%d]=%s\n", i, + local->crypt_info.crypt[i]->ops->name); } } p += sprintf(p, "pri_only=%d\n", local->pri_only); @@ -206,12 +207,13 @@ static int prism2_crypt_proc_read(char *page, char **start, off_t off, return 0; } - p += sprintf(p, "tx_keyidx=%d\n", local->tx_keyidx); + p += sprintf(p, "tx_keyidx=%d\n", local->crypt_info.tx_keyidx); for (i = 0; i < WEP_KEYS; i++) { - if (local->crypt[i] && local->crypt[i]->ops && - local->crypt[i]->ops->print_stats) { - p = local->crypt[i]->ops->print_stats( - p, local->crypt[i]->priv); + if (local->crypt_info.crypt[i] && + local->crypt_info.crypt[i]->ops && + local->crypt_info.crypt[i]->ops->print_stats) { + p = local->crypt_info.crypt[i]->ops->print_stats( + p, local->crypt_info.crypt[i]->priv); } } diff --git a/drivers/net/wireless/hostap/hostap_wlan.h b/drivers/net/wireless/hostap/hostap_wlan.h index d2c7a56b8b59..4d8d51a353cd 100644 --- a/drivers/net/wireless/hostap/hostap_wlan.h +++ b/drivers/net/wireless/hostap/hostap_wlan.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "hostap_config.h" #include "hostap_common.h" @@ -763,10 +764,7 @@ struct local_info { #define WEP_KEYS 4 #define WEP_KEY_LEN 13 - struct ieee80211_crypt_data *crypt[WEP_KEYS]; - int tx_keyidx; /* default TX key index (crypt[tx_keyidx]) */ - struct timer_list crypt_deinit_timer; - struct list_head crypt_deinit_list; + struct lib80211_crypt_info crypt_info; int open_wep; /* allow unencrypted frames */ int host_encrypt; diff --git a/drivers/net/wireless/ipw2100.c b/drivers/net/wireless/ipw2100.c index 062c9f280304..2d2044d3d1c9 100644 --- a/drivers/net/wireless/ipw2100.c +++ b/drivers/net/wireless/ipw2100.c @@ -4010,7 +4010,7 @@ static ssize_t show_internals(struct device *d, struct device_attribute *attr, else len += sprintf(buf + len, "not connected\n"); - DUMP_VAR(ieee->crypt[priv->ieee->tx_keyidx], "p"); + DUMP_VAR(ieee->crypt_info.crypt[priv->ieee->crypt_info.tx_keyidx], "p"); DUMP_VAR(status, "08lx"); DUMP_VAR(config, "08lx"); DUMP_VAR(capability, "08lx"); @@ -5514,7 +5514,7 @@ static int ipw2100_configure_security(struct ipw2100_priv *priv, int batch_mode) } } - ipw2100_set_key_index(priv, priv->ieee->tx_keyidx, 1); + ipw2100_set_key_index(priv, priv->ieee->crypt_info.tx_keyidx, 1); } /* Always enable privacy so the Host can filter WEP packets if @@ -7620,7 +7620,7 @@ static int ipw2100_wx_set_auth(struct net_device *dev, struct ipw2100_priv *priv = ieee80211_priv(dev); struct ieee80211_device *ieee = priv->ieee; struct iw_param *param = &wrqu->param; - struct ieee80211_crypt_data *crypt; + struct lib80211_crypt_data *crypt; unsigned long flags; int ret = 0; @@ -7635,7 +7635,7 @@ static int ipw2100_wx_set_auth(struct net_device *dev, break; case IW_AUTH_TKIP_COUNTERMEASURES: - crypt = priv->ieee->crypt[priv->ieee->tx_keyidx]; + crypt = priv->ieee->crypt_info.crypt[priv->ieee->crypt_info.tx_keyidx]; if (!crypt || !crypt->ops->set_flags || !crypt->ops->get_flags) break; @@ -7712,7 +7712,7 @@ static int ipw2100_wx_get_auth(struct net_device *dev, { struct ipw2100_priv *priv = ieee80211_priv(dev); struct ieee80211_device *ieee = priv->ieee; - struct ieee80211_crypt_data *crypt; + struct lib80211_crypt_data *crypt; struct iw_param *param = &wrqu->param; int ret = 0; @@ -7728,7 +7728,7 @@ static int ipw2100_wx_get_auth(struct net_device *dev, break; case IW_AUTH_TKIP_COUNTERMEASURES: - crypt = priv->ieee->crypt[priv->ieee->tx_keyidx]; + crypt = priv->ieee->crypt_info.crypt[priv->ieee->crypt_info.tx_keyidx]; if (!crypt || !crypt->ops->get_flags) { IPW_DEBUG_WARNING("Can't get TKIP countermeasures: " "crypt not set!\n"); diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c index 6ca0f1c05048..d2a2b7586d08 100644 --- a/drivers/net/wireless/ipw2200.c +++ b/drivers/net/wireless/ipw2200.c @@ -6600,7 +6600,7 @@ static int ipw_wx_set_auth(struct net_device *dev, struct ipw_priv *priv = ieee80211_priv(dev); struct ieee80211_device *ieee = priv->ieee; struct iw_param *param = &wrqu->param; - struct ieee80211_crypt_data *crypt; + struct lib80211_crypt_data *crypt; unsigned long flags; int ret = 0; @@ -6622,7 +6622,7 @@ static int ipw_wx_set_auth(struct net_device *dev, break; case IW_AUTH_TKIP_COUNTERMEASURES: - crypt = priv->ieee->crypt[priv->ieee->tx_keyidx]; + crypt = priv->ieee->crypt_info.crypt[priv->ieee->crypt_info.tx_keyidx]; if (!crypt || !crypt->ops->set_flags || !crypt->ops->get_flags) break; @@ -6699,7 +6699,7 @@ static int ipw_wx_get_auth(struct net_device *dev, { struct ipw_priv *priv = ieee80211_priv(dev); struct ieee80211_device *ieee = priv->ieee; - struct ieee80211_crypt_data *crypt; + struct lib80211_crypt_data *crypt; struct iw_param *param = &wrqu->param; int ret = 0; @@ -6715,7 +6715,7 @@ static int ipw_wx_get_auth(struct net_device *dev, break; case IW_AUTH_TKIP_COUNTERMEASURES: - crypt = priv->ieee->crypt[priv->ieee->tx_keyidx]; + crypt = priv->ieee->crypt_info.crypt[priv->ieee->crypt_info.tx_keyidx]; if (!crypt || !crypt->ops->get_flags) break; @@ -10251,8 +10251,8 @@ static int ipw_tx_skb(struct ipw_priv *priv, struct ieee80211_txb *txb, case SEC_LEVEL_1: tfd->u.data.tfd.tfd_24.mchdr.frame_ctl |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - tfd->u.data.key_index = priv->ieee->tx_keyidx; - if (priv->ieee->sec.key_sizes[priv->ieee->tx_keyidx] <= + tfd->u.data.key_index = priv->ieee->crypt_info.tx_keyidx; + if (priv->ieee->sec.key_sizes[priv->ieee->crypt_info.tx_keyidx] <= 40) tfd->u.data.key_index |= DCT_WEP_KEY_64Bit; else diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index 738734a4653b..7ab3ed2bbccb 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -30,6 +30,8 @@ #include #include +#include + #define IEEE80211_VERSION "git-1.1.13" #define IEEE80211_DATA_LEN 2304 @@ -355,8 +357,6 @@ struct ieee80211_stats { struct ieee80211_device; -#include "ieee80211_crypt.h" - #define SEC_KEY_1 (1<<0) #define SEC_KEY_2 (1<<1) #define SEC_KEY_3 (1<<2) @@ -937,11 +937,7 @@ struct ieee80211_device { size_t wpa_ie_len; u8 *wpa_ie; - struct list_head crypt_deinit_list; - struct ieee80211_crypt_data *crypt[WEP_KEYS]; - int tx_keyidx; /* default TX key index (crypt[tx_keyidx]) */ - struct timer_list crypt_deinit_timer; - int crypt_quiesced; + struct lib80211_crypt_info crypt_info; int bcrx_sta_key; /* use individual keys to override default keys even * with RX of broad/multicast frames */ diff --git a/include/net/ieee80211_crypt.h b/include/net/ieee80211_crypt.h deleted file mode 100644 index b3d65e0bedd3..000000000000 --- a/include/net/ieee80211_crypt.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Original code based on Host AP (software wireless LAN access point) driver - * for Intersil Prism2/2.5/3. - * - * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen - * - * Copyright (c) 2002-2003, Jouni Malinen - * - * Adaption to a generic IEEE 802.11 stack by James Ketrenos - * - * - * Copyright (c) 2004, Intel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. See README and COPYING for - * more details. - */ - -/* - * This file defines the interface to the ieee80211 crypto module. - */ -#ifndef IEEE80211_CRYPT_H -#define IEEE80211_CRYPT_H - -#include -#include -#include -#include - -enum { - IEEE80211_CRYPTO_TKIP_COUNTERMEASURES = (1 << 0), -}; - -struct sk_buff; -struct module; - -struct ieee80211_crypto_ops { - const char *name; - struct list_head list; - - /* init new crypto context (e.g., allocate private data space, - * select IV, etc.); returns NULL on failure or pointer to allocated - * private data on success */ - void *(*init) (int keyidx); - - /* deinitialize crypto context and free allocated private data */ - void (*deinit) (void *priv); - - int (*build_iv) (struct sk_buff * skb, int hdr_len, - u8 *key, int keylen, void *priv); - - /* encrypt/decrypt return < 0 on error or >= 0 on success. The return - * value from decrypt_mpdu is passed as the keyidx value for - * decrypt_msdu. skb must have enough head and tail room for the - * encryption; if not, error will be returned; these functions are - * called for all MPDUs (i.e., fragments). - */ - int (*encrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv); - int (*decrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv); - - /* These functions are called for full MSDUs, i.e. full frames. - * These can be NULL if full MSDU operations are not needed. */ - int (*encrypt_msdu) (struct sk_buff * skb, int hdr_len, void *priv); - int (*decrypt_msdu) (struct sk_buff * skb, int keyidx, int hdr_len, - void *priv); - - int (*set_key) (void *key, int len, u8 * seq, void *priv); - int (*get_key) (void *key, int len, u8 * seq, void *priv); - - /* procfs handler for printing out key information and possible - * statistics */ - char *(*print_stats) (char *p, void *priv); - - /* Crypto specific flag get/set for configuration settings */ - unsigned long (*get_flags) (void *priv); - unsigned long (*set_flags) (unsigned long flags, void *priv); - - /* maximum number of bytes added by encryption; encrypt buf is - * allocated with extra_prefix_len bytes, copy of in_buf, and - * extra_postfix_len; encrypt need not use all this space, but - * the result must start at the beginning of the buffer and correct - * length must be returned */ - int extra_mpdu_prefix_len, extra_mpdu_postfix_len; - int extra_msdu_prefix_len, extra_msdu_postfix_len; - - struct module *owner; -}; - -struct ieee80211_crypt_data { - struct list_head list; /* delayed deletion list */ - struct ieee80211_crypto_ops *ops; - void *priv; - atomic_t refcnt; -}; - -struct ieee80211_device; - -int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops); -int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops); -struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name); -void ieee80211_crypt_deinit_entries(struct ieee80211_device *, int); -void ieee80211_crypt_deinit_handler(unsigned long); -void ieee80211_crypt_delayed_deinit(struct ieee80211_device *ieee, - struct ieee80211_crypt_data **crypt); -void ieee80211_crypt_quiescing(struct ieee80211_device *ieee); - -#endif diff --git a/include/net/lib80211.h b/include/net/lib80211.h index e1558a187ac0..dd1079f98da4 100644 --- a/include/net/lib80211.h +++ b/include/net/lib80211.h @@ -3,11 +3,32 @@ * * Copyright (c) 2008, John W. Linville * + * Some bits copied from old ieee80211 component, w/ original copyright + * notices below: + * + * Original code based on Host AP (software wireless LAN access point) driver + * for Intersil Prism2/2.5/3. + * + * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen + * + * Copyright (c) 2002-2003, Jouni Malinen + * + * Adaption to a generic IEEE 802.11 stack by James Ketrenos + * + * + * Copyright (c) 2004, Intel Corporation + * */ #ifndef LIB80211_H #define LIB80211_H +#include +#include +#include +#include +#include +#include #include /* print_ssid() is intended to be used in debug (and possibly error) @@ -15,4 +36,91 @@ const char *print_ssid(char *buf, const char *ssid, u8 ssid_len); #define DECLARE_SSID_BUF(var) char var[IEEE80211_MAX_SSID_LEN * 4 + 1] __maybe_unused +#define NUM_WEP_KEYS 4 + +enum { + IEEE80211_CRYPTO_TKIP_COUNTERMEASURES = (1 << 0), +}; + +struct lib80211_crypto_ops { + const char *name; + struct list_head list; + + /* init new crypto context (e.g., allocate private data space, + * select IV, etc.); returns NULL on failure or pointer to allocated + * private data on success */ + void *(*init) (int keyidx); + + /* deinitialize crypto context and free allocated private data */ + void (*deinit) (void *priv); + + int (*build_iv) (struct sk_buff * skb, int hdr_len, + u8 *key, int keylen, void *priv); + + /* encrypt/decrypt return < 0 on error or >= 0 on success. The return + * value from decrypt_mpdu is passed as the keyidx value for + * decrypt_msdu. skb must have enough head and tail room for the + * encryption; if not, error will be returned; these functions are + * called for all MPDUs (i.e., fragments). + */ + int (*encrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv); + int (*decrypt_mpdu) (struct sk_buff * skb, int hdr_len, void *priv); + + /* These functions are called for full MSDUs, i.e. full frames. + * These can be NULL if full MSDU operations are not needed. */ + int (*encrypt_msdu) (struct sk_buff * skb, int hdr_len, void *priv); + int (*decrypt_msdu) (struct sk_buff * skb, int keyidx, int hdr_len, + void *priv); + + int (*set_key) (void *key, int len, u8 * seq, void *priv); + int (*get_key) (void *key, int len, u8 * seq, void *priv); + + /* procfs handler for printing out key information and possible + * statistics */ + char *(*print_stats) (char *p, void *priv); + + /* Crypto specific flag get/set for configuration settings */ + unsigned long (*get_flags) (void *priv); + unsigned long (*set_flags) (unsigned long flags, void *priv); + + /* maximum number of bytes added by encryption; encrypt buf is + * allocated with extra_prefix_len bytes, copy of in_buf, and + * extra_postfix_len; encrypt need not use all this space, but + * the result must start at the beginning of the buffer and correct + * length must be returned */ + int extra_mpdu_prefix_len, extra_mpdu_postfix_len; + int extra_msdu_prefix_len, extra_msdu_postfix_len; + + struct module *owner; +}; + +struct lib80211_crypt_data { + struct list_head list; /* delayed deletion list */ + struct lib80211_crypto_ops *ops; + void *priv; + atomic_t refcnt; +}; + +struct lib80211_crypt_info { + char *name; + /* Most clients will already have a lock, + so just point to that. */ + spinlock_t *lock; + + struct lib80211_crypt_data *crypt[NUM_WEP_KEYS]; + int tx_keyidx; /* default TX key index (crypt[tx_keyidx]) */ + struct list_head crypt_deinit_list; + struct timer_list crypt_deinit_timer; + int crypt_quiesced; +}; + +int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops); +int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops); +struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name); +void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *, int); +void lib80211_crypt_deinit_handler(unsigned long); +void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info, + struct lib80211_crypt_data **crypt); +void lib80211_crypt_quiescing(struct lib80211_crypt_info *info); + #endif /* LIB80211_H */ diff --git a/net/ieee80211/Kconfig b/net/ieee80211/Kconfig index d2282bb2e4f1..46f24f4c9dc7 100644 --- a/net/ieee80211/Kconfig +++ b/net/ieee80211/Kconfig @@ -8,10 +8,10 @@ config IEEE80211 select CRYPTO_MICHAEL_MIC select CRYPTO_ECB select CRC32 - select IEEE80211_CRYPT_WEP - select IEEE80211_CRYPT_TKIP - select IEEE80211_CRYPT_CCMP select LIB80211 + select LIB80211_CRYPT_WEP + select LIB80211_CRYPT_TKIP + select LIB80211_CRYPT_CCMP ---help--- This option enables the hardware independent IEEE 802.11 networking stack. This component is deprecated in favor of the @@ -39,12 +39,3 @@ config IEEE80211_DEBUG If you are not trying to debug or develop the ieee80211 subsystem, you most likely want to say N here. - -config IEEE80211_CRYPT_WEP - tristate - -config IEEE80211_CRYPT_CCMP - tristate - -config IEEE80211_CRYPT_TKIP - tristate diff --git a/net/ieee80211/Makefile b/net/ieee80211/Makefile index f988417121da..158963ff18d2 100644 --- a/net/ieee80211/Makefile +++ b/net/ieee80211/Makefile @@ -1,8 +1,4 @@ obj-$(CONFIG_IEEE80211) += ieee80211.o -obj-$(CONFIG_IEEE80211) += ieee80211_crypt.o -obj-$(CONFIG_IEEE80211_CRYPT_WEP) += ieee80211_crypt_wep.o -obj-$(CONFIG_IEEE80211_CRYPT_CCMP) += ieee80211_crypt_ccmp.o -obj-$(CONFIG_IEEE80211_CRYPT_TKIP) += ieee80211_crypt_tkip.o ieee80211-objs := \ ieee80211_module.o \ ieee80211_tx.o \ diff --git a/net/ieee80211/ieee80211_crypt.c b/net/ieee80211/ieee80211_crypt.c deleted file mode 100644 index df5592c9339f..000000000000 --- a/net/ieee80211/ieee80211_crypt.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Host AP crypto routines - * - * Copyright (c) 2002-2003, Jouni Malinen - * Portions Copyright (C) 2004, Intel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. See README and COPYING for - * more details. - * - */ - -#include -#include -#include -#include -#include -#include - -MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("HostAP crypto"); -MODULE_LICENSE("GPL"); - -struct ieee80211_crypto_alg { - struct list_head list; - struct ieee80211_crypto_ops *ops; -}; - -static LIST_HEAD(ieee80211_crypto_algs); -static DEFINE_SPINLOCK(ieee80211_crypto_lock); - -void ieee80211_crypt_deinit_entries(struct ieee80211_device *ieee, int force) -{ - struct ieee80211_crypt_data *entry, *next; - unsigned long flags; - - spin_lock_irqsave(&ieee->lock, flags); - list_for_each_entry_safe(entry, next, &ieee->crypt_deinit_list, list) { - if (atomic_read(&entry->refcnt) != 0 && !force) - continue; - - list_del(&entry->list); - - if (entry->ops) { - entry->ops->deinit(entry->priv); - module_put(entry->ops->owner); - } - kfree(entry); - } - spin_unlock_irqrestore(&ieee->lock, flags); -} - -/* After this, crypt_deinit_list won't accept new members */ -void ieee80211_crypt_quiescing(struct ieee80211_device *ieee) -{ - unsigned long flags; - - spin_lock_irqsave(&ieee->lock, flags); - ieee->crypt_quiesced = 1; - spin_unlock_irqrestore(&ieee->lock, flags); -} - -void ieee80211_crypt_deinit_handler(unsigned long data) -{ - struct ieee80211_device *ieee = (struct ieee80211_device *)data; - unsigned long flags; - - ieee80211_crypt_deinit_entries(ieee, 0); - - spin_lock_irqsave(&ieee->lock, flags); - if (!list_empty(&ieee->crypt_deinit_list) && !ieee->crypt_quiesced) { - printk(KERN_DEBUG "%s: entries remaining in delayed crypt " - "deletion list\n", ieee->dev->name); - ieee->crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&ieee->crypt_deinit_timer); - } - spin_unlock_irqrestore(&ieee->lock, flags); -} - -void ieee80211_crypt_delayed_deinit(struct ieee80211_device *ieee, - struct ieee80211_crypt_data **crypt) -{ - struct ieee80211_crypt_data *tmp; - unsigned long flags; - - if (*crypt == NULL) - return; - - tmp = *crypt; - *crypt = NULL; - - /* must not run ops->deinit() while there may be pending encrypt or - * decrypt operations. Use a list of delayed deinits to avoid needing - * locking. */ - - spin_lock_irqsave(&ieee->lock, flags); - if (!ieee->crypt_quiesced) { - list_add(&tmp->list, &ieee->crypt_deinit_list); - if (!timer_pending(&ieee->crypt_deinit_timer)) { - ieee->crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&ieee->crypt_deinit_timer); - } - } - spin_unlock_irqrestore(&ieee->lock, flags); -} - -int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops) -{ - unsigned long flags; - struct ieee80211_crypto_alg *alg; - - alg = kzalloc(sizeof(*alg), GFP_KERNEL); - if (alg == NULL) - return -ENOMEM; - - alg->ops = ops; - - spin_lock_irqsave(&ieee80211_crypto_lock, flags); - list_add(&alg->list, &ieee80211_crypto_algs); - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - - printk(KERN_DEBUG "ieee80211_crypt: registered algorithm '%s'\n", - ops->name); - - return 0; -} - -int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops) -{ - struct ieee80211_crypto_alg *alg; - unsigned long flags; - - spin_lock_irqsave(&ieee80211_crypto_lock, flags); - list_for_each_entry(alg, &ieee80211_crypto_algs, list) { - if (alg->ops == ops) - goto found; - } - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - return -EINVAL; - - found: - printk(KERN_DEBUG "ieee80211_crypt: unregistered algorithm " - "'%s'\n", ops->name); - list_del(&alg->list); - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - kfree(alg); - return 0; -} - -struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name) -{ - struct ieee80211_crypto_alg *alg; - unsigned long flags; - - spin_lock_irqsave(&ieee80211_crypto_lock, flags); - list_for_each_entry(alg, &ieee80211_crypto_algs, list) { - if (strcmp(alg->ops->name, name) == 0) - goto found; - } - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - return NULL; - - found: - spin_unlock_irqrestore(&ieee80211_crypto_lock, flags); - return alg->ops; -} - -static void *ieee80211_crypt_null_init(int keyidx) -{ - return (void *)1; -} - -static void ieee80211_crypt_null_deinit(void *priv) -{ -} - -static struct ieee80211_crypto_ops ieee80211_crypt_null = { - .name = "NULL", - .init = ieee80211_crypt_null_init, - .deinit = ieee80211_crypt_null_deinit, - .owner = THIS_MODULE, -}; - -static int __init ieee80211_crypto_init(void) -{ - return ieee80211_register_crypto_ops(&ieee80211_crypt_null); -} - -static void __exit ieee80211_crypto_deinit(void) -{ - ieee80211_unregister_crypto_ops(&ieee80211_crypt_null); - BUG_ON(!list_empty(&ieee80211_crypto_algs)); -} - -EXPORT_SYMBOL(ieee80211_crypt_deinit_entries); -EXPORT_SYMBOL(ieee80211_crypt_deinit_handler); -EXPORT_SYMBOL(ieee80211_crypt_delayed_deinit); -EXPORT_SYMBOL(ieee80211_crypt_quiescing); - -EXPORT_SYMBOL(ieee80211_register_crypto_ops); -EXPORT_SYMBOL(ieee80211_unregister_crypto_ops); -EXPORT_SYMBOL(ieee80211_get_crypto_ops); - -module_init(ieee80211_crypto_init); -module_exit(ieee80211_crypto_deinit); diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c deleted file mode 100644 index bea04af0b482..000000000000 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ /dev/null @@ -1,492 +0,0 @@ -/* - * Host AP crypt: host-based CCMP encryption implementation for Host AP driver - * - * Copyright (c) 2003-2004, Jouni Malinen - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. See README and COPYING for - * more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("Host AP crypt: CCMP"); -MODULE_LICENSE("GPL"); - -#define AES_BLOCK_LEN 16 -#define CCMP_HDR_LEN 8 -#define CCMP_MIC_LEN 8 -#define CCMP_TK_LEN 16 -#define CCMP_PN_LEN 6 - -struct ieee80211_ccmp_data { - u8 key[CCMP_TK_LEN]; - int key_set; - - u8 tx_pn[CCMP_PN_LEN]; - u8 rx_pn[CCMP_PN_LEN]; - - u32 dot11RSNAStatsCCMPFormatErrors; - u32 dot11RSNAStatsCCMPReplays; - u32 dot11RSNAStatsCCMPDecryptErrors; - - int key_idx; - - struct crypto_cipher *tfm; - - /* scratch buffers for virt_to_page() (crypto API) */ - u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN], - tx_e[AES_BLOCK_LEN], tx_s0[AES_BLOCK_LEN]; - u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN]; -}; - -static inline void ieee80211_ccmp_aes_encrypt(struct crypto_cipher *tfm, - const u8 pt[16], u8 ct[16]) -{ - crypto_cipher_encrypt_one(tfm, ct, pt); -} - -static void *ieee80211_ccmp_init(int key_idx) -{ - struct ieee80211_ccmp_data *priv; - - priv = kzalloc(sizeof(*priv), GFP_ATOMIC); - if (priv == NULL) - goto fail; - priv->key_idx = key_idx; - - priv->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(priv->tfm)) { - printk(KERN_DEBUG "ieee80211_crypt_ccmp: could not allocate " - "crypto API aes\n"); - priv->tfm = NULL; - goto fail; - } - - return priv; - - fail: - if (priv) { - if (priv->tfm) - crypto_free_cipher(priv->tfm); - kfree(priv); - } - - return NULL; -} - -static void ieee80211_ccmp_deinit(void *priv) -{ - struct ieee80211_ccmp_data *_priv = priv; - if (_priv && _priv->tfm) - crypto_free_cipher(_priv->tfm); - kfree(priv); -} - -static inline void xor_block(u8 * b, u8 * a, size_t len) -{ - int i; - for (i = 0; i < len; i++) - b[i] ^= a[i]; -} - -static void ccmp_init_blocks(struct crypto_cipher *tfm, - struct ieee80211_hdr_4addr *hdr, - u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0) -{ - u8 *pos, qc = 0; - size_t aad_len; - u16 fc; - int a4_included, qc_included; - u8 aad[2 * AES_BLOCK_LEN]; - - fc = le16_to_cpu(hdr->frame_ctl); - a4_included = ((fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) == - (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)); - qc_included = ((WLAN_FC_GET_TYPE(fc) == IEEE80211_FTYPE_DATA) && - (WLAN_FC_GET_STYPE(fc) & IEEE80211_STYPE_QOS_DATA)); - aad_len = 22; - if (a4_included) - aad_len += 6; - if (qc_included) { - pos = (u8 *) & hdr->addr4; - if (a4_included) - pos += 6; - qc = *pos & 0x0f; - aad_len += 2; - } - - /* CCM Initial Block: - * Flag (Include authentication header, M=3 (8-octet MIC), - * L=1 (2-octet Dlen)) - * Nonce: 0x00 | A2 | PN - * Dlen */ - b0[0] = 0x59; - b0[1] = qc; - memcpy(b0 + 2, hdr->addr2, ETH_ALEN); - memcpy(b0 + 8, pn, CCMP_PN_LEN); - b0[14] = (dlen >> 8) & 0xff; - b0[15] = dlen & 0xff; - - /* AAD: - * FC with bits 4..6 and 11..13 masked to zero; 14 is always one - * A1 | A2 | A3 - * SC with bits 4..15 (seq#) masked to zero - * A4 (if present) - * QC (if present) - */ - pos = (u8 *) hdr; - aad[0] = 0; /* aad_len >> 8 */ - aad[1] = aad_len & 0xff; - aad[2] = pos[0] & 0x8f; - aad[3] = pos[1] & 0xc7; - memcpy(aad + 4, hdr->addr1, 3 * ETH_ALEN); - pos = (u8 *) & hdr->seq_ctl; - aad[22] = pos[0] & 0x0f; - aad[23] = 0; /* all bits masked */ - memset(aad + 24, 0, 8); - if (a4_included) - memcpy(aad + 24, hdr->addr4, ETH_ALEN); - if (qc_included) { - aad[a4_included ? 30 : 24] = qc; - /* rest of QC masked */ - } - - /* Start with the first block and AAD */ - ieee80211_ccmp_aes_encrypt(tfm, b0, auth); - xor_block(auth, aad, AES_BLOCK_LEN); - ieee80211_ccmp_aes_encrypt(tfm, auth, auth); - xor_block(auth, &aad[AES_BLOCK_LEN], AES_BLOCK_LEN); - ieee80211_ccmp_aes_encrypt(tfm, auth, auth); - b0[0] &= 0x07; - b0[14] = b0[15] = 0; - ieee80211_ccmp_aes_encrypt(tfm, b0, s0); -} - -static int ieee80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, - u8 *aeskey, int keylen, void *priv) -{ - struct ieee80211_ccmp_data *key = priv; - int i; - u8 *pos; - - if (skb_headroom(skb) < CCMP_HDR_LEN || skb->len < hdr_len) - return -1; - - if (aeskey != NULL && keylen >= CCMP_TK_LEN) - memcpy(aeskey, key->key, CCMP_TK_LEN); - - pos = skb_push(skb, CCMP_HDR_LEN); - memmove(pos, pos + CCMP_HDR_LEN, hdr_len); - pos += hdr_len; - - i = CCMP_PN_LEN - 1; - while (i >= 0) { - key->tx_pn[i]++; - if (key->tx_pn[i] != 0) - break; - i--; - } - - *pos++ = key->tx_pn[5]; - *pos++ = key->tx_pn[4]; - *pos++ = 0; - *pos++ = (key->key_idx << 6) | (1 << 5) /* Ext IV included */ ; - *pos++ = key->tx_pn[3]; - *pos++ = key->tx_pn[2]; - *pos++ = key->tx_pn[1]; - *pos++ = key->tx_pn[0]; - - return CCMP_HDR_LEN; -} - -static int ieee80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct ieee80211_ccmp_data *key = priv; - int data_len, i, blocks, last, len; - u8 *pos, *mic; - struct ieee80211_hdr_4addr *hdr; - u8 *b0 = key->tx_b0; - u8 *b = key->tx_b; - u8 *e = key->tx_e; - u8 *s0 = key->tx_s0; - - if (skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len) - return -1; - - data_len = skb->len - hdr_len; - len = ieee80211_ccmp_hdr(skb, hdr_len, NULL, 0, priv); - if (len < 0) - return -1; - - pos = skb->data + hdr_len + CCMP_HDR_LEN; - mic = skb_put(skb, CCMP_MIC_LEN); - hdr = (struct ieee80211_hdr_4addr *)skb->data; - ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0); - - blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); - last = data_len % AES_BLOCK_LEN; - - for (i = 1; i <= blocks; i++) { - len = (i == blocks && last) ? last : AES_BLOCK_LEN; - /* Authentication */ - xor_block(b, pos, len); - ieee80211_ccmp_aes_encrypt(key->tfm, b, b); - /* Encryption, with counter */ - b0[14] = (i >> 8) & 0xff; - b0[15] = i & 0xff; - ieee80211_ccmp_aes_encrypt(key->tfm, b0, e); - xor_block(pos, e, len); - pos += len; - } - - for (i = 0; i < CCMP_MIC_LEN; i++) - mic[i] = b[i] ^ s0[i]; - - return 0; -} - -/* - * deal with seq counter wrapping correctly. - * refer to timer_after() for jiffies wrapping handling - */ -static inline int ccmp_replay_check(u8 *pn_n, u8 *pn_o) -{ - u32 iv32_n, iv16_n; - u32 iv32_o, iv16_o; - - iv32_n = (pn_n[0] << 24) | (pn_n[1] << 16) | (pn_n[2] << 8) | pn_n[3]; - iv16_n = (pn_n[4] << 8) | pn_n[5]; - - iv32_o = (pn_o[0] << 24) | (pn_o[1] << 16) | (pn_o[2] << 8) | pn_o[3]; - iv16_o = (pn_o[4] << 8) | pn_o[5]; - - if ((s32)iv32_n - (s32)iv32_o < 0 || - (iv32_n == iv32_o && iv16_n <= iv16_o)) - return 1; - return 0; -} - -static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct ieee80211_ccmp_data *key = priv; - u8 keyidx, *pos; - struct ieee80211_hdr_4addr *hdr; - u8 *b0 = key->rx_b0; - u8 *b = key->rx_b; - u8 *a = key->rx_a; - u8 pn[6]; - int i, blocks, last, len; - size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN - CCMP_MIC_LEN; - u8 *mic = skb->data + skb->len - CCMP_MIC_LEN; - - if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) { - key->dot11RSNAStatsCCMPFormatErrors++; - return -1; - } - - hdr = (struct ieee80211_hdr_4addr *)skb->data; - pos = skb->data + hdr_len; - keyidx = pos[3]; - if (!(keyidx & (1 << 5))) { - if (net_ratelimit()) { - printk(KERN_DEBUG "CCMP: received packet without ExtIV" - " flag from %pM\n", hdr->addr2); - } - key->dot11RSNAStatsCCMPFormatErrors++; - return -2; - } - keyidx >>= 6; - if (key->key_idx != keyidx) { - printk(KERN_DEBUG "CCMP: RX tkey->key_idx=%d frame " - "keyidx=%d priv=%p\n", key->key_idx, keyidx, priv); - return -6; - } - if (!key->key_set) { - if (net_ratelimit()) { - printk(KERN_DEBUG "CCMP: received packet from %pM" - " with keyid=%d that does not have a configured" - " key\n", hdr->addr2, keyidx); - } - return -3; - } - - pn[0] = pos[7]; - pn[1] = pos[6]; - pn[2] = pos[5]; - pn[3] = pos[4]; - pn[4] = pos[1]; - pn[5] = pos[0]; - pos += 8; - - if (ccmp_replay_check(pn, key->rx_pn)) { - if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) { - IEEE80211_DEBUG_DROP("CCMP: replay detected: STA=%pM " - "previous PN %02x%02x%02x%02x%02x%02x " - "received PN %02x%02x%02x%02x%02x%02x\n", - hdr->addr2, - key->rx_pn[0], key->rx_pn[1], key->rx_pn[2], - key->rx_pn[3], key->rx_pn[4], key->rx_pn[5], - pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); - } - key->dot11RSNAStatsCCMPReplays++; - return -4; - } - - ccmp_init_blocks(key->tfm, hdr, pn, data_len, b0, a, b); - xor_block(mic, b, CCMP_MIC_LEN); - - blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); - last = data_len % AES_BLOCK_LEN; - - for (i = 1; i <= blocks; i++) { - len = (i == blocks && last) ? last : AES_BLOCK_LEN; - /* Decrypt, with counter */ - b0[14] = (i >> 8) & 0xff; - b0[15] = i & 0xff; - ieee80211_ccmp_aes_encrypt(key->tfm, b0, b); - xor_block(pos, b, len); - /* Authentication */ - xor_block(a, pos, len); - ieee80211_ccmp_aes_encrypt(key->tfm, a, a); - pos += len; - } - - if (memcmp(mic, a, CCMP_MIC_LEN) != 0) { - if (net_ratelimit()) { - printk(KERN_DEBUG "CCMP: decrypt failed: STA=" - "%pM\n", hdr->addr2); - } - key->dot11RSNAStatsCCMPDecryptErrors++; - return -5; - } - - memcpy(key->rx_pn, pn, CCMP_PN_LEN); - - /* Remove hdr and MIC */ - memmove(skb->data + CCMP_HDR_LEN, skb->data, hdr_len); - skb_pull(skb, CCMP_HDR_LEN); - skb_trim(skb, skb->len - CCMP_MIC_LEN); - - return keyidx; -} - -static int ieee80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) -{ - struct ieee80211_ccmp_data *data = priv; - int keyidx; - struct crypto_cipher *tfm = data->tfm; - - keyidx = data->key_idx; - memset(data, 0, sizeof(*data)); - data->key_idx = keyidx; - data->tfm = tfm; - if (len == CCMP_TK_LEN) { - memcpy(data->key, key, CCMP_TK_LEN); - data->key_set = 1; - if (seq) { - data->rx_pn[0] = seq[5]; - data->rx_pn[1] = seq[4]; - data->rx_pn[2] = seq[3]; - data->rx_pn[3] = seq[2]; - data->rx_pn[4] = seq[1]; - data->rx_pn[5] = seq[0]; - } - crypto_cipher_setkey(data->tfm, data->key, CCMP_TK_LEN); - } else if (len == 0) - data->key_set = 0; - else - return -1; - - return 0; -} - -static int ieee80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv) -{ - struct ieee80211_ccmp_data *data = priv; - - if (len < CCMP_TK_LEN) - return -1; - - if (!data->key_set) - return 0; - memcpy(key, data->key, CCMP_TK_LEN); - - if (seq) { - seq[0] = data->tx_pn[5]; - seq[1] = data->tx_pn[4]; - seq[2] = data->tx_pn[3]; - seq[3] = data->tx_pn[2]; - seq[4] = data->tx_pn[1]; - seq[5] = data->tx_pn[0]; - } - - return CCMP_TK_LEN; -} - -static char *ieee80211_ccmp_print_stats(char *p, void *priv) -{ - struct ieee80211_ccmp_data *ccmp = priv; - - p += sprintf(p, "key[%d] alg=CCMP key_set=%d " - "tx_pn=%02x%02x%02x%02x%02x%02x " - "rx_pn=%02x%02x%02x%02x%02x%02x " - "format_errors=%d replays=%d decrypt_errors=%d\n", - ccmp->key_idx, ccmp->key_set, - ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2], - ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5], - ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2], - ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5], - ccmp->dot11RSNAStatsCCMPFormatErrors, - ccmp->dot11RSNAStatsCCMPReplays, - ccmp->dot11RSNAStatsCCMPDecryptErrors); - - return p; -} - -static struct ieee80211_crypto_ops ieee80211_crypt_ccmp = { - .name = "CCMP", - .init = ieee80211_ccmp_init, - .deinit = ieee80211_ccmp_deinit, - .build_iv = ieee80211_ccmp_hdr, - .encrypt_mpdu = ieee80211_ccmp_encrypt, - .decrypt_mpdu = ieee80211_ccmp_decrypt, - .encrypt_msdu = NULL, - .decrypt_msdu = NULL, - .set_key = ieee80211_ccmp_set_key, - .get_key = ieee80211_ccmp_get_key, - .print_stats = ieee80211_ccmp_print_stats, - .extra_mpdu_prefix_len = CCMP_HDR_LEN, - .extra_mpdu_postfix_len = CCMP_MIC_LEN, - .owner = THIS_MODULE, -}; - -static int __init ieee80211_crypto_ccmp_init(void) -{ - return ieee80211_register_crypto_ops(&ieee80211_crypt_ccmp); -} - -static void __exit ieee80211_crypto_ccmp_exit(void) -{ - ieee80211_unregister_crypto_ops(&ieee80211_crypt_ccmp); -} - -module_init(ieee80211_crypto_ccmp_init); -module_exit(ieee80211_crypto_ccmp_exit); diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c deleted file mode 100644 index d12da1da6328..000000000000 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ /dev/null @@ -1,782 +0,0 @@ -/* - * Host AP crypt: host-based TKIP encryption implementation for Host AP driver - * - * Copyright (c) 2003-2004, Jouni Malinen - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. See README and COPYING for - * more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("Host AP crypt: TKIP"); -MODULE_LICENSE("GPL"); - -struct ieee80211_tkip_data { -#define TKIP_KEY_LEN 32 - u8 key[TKIP_KEY_LEN]; - int key_set; - - u32 tx_iv32; - u16 tx_iv16; - u16 tx_ttak[5]; - int tx_phase1_done; - - u32 rx_iv32; - u16 rx_iv16; - u16 rx_ttak[5]; - int rx_phase1_done; - u32 rx_iv32_new; - u16 rx_iv16_new; - - u32 dot11RSNAStatsTKIPReplays; - u32 dot11RSNAStatsTKIPICVErrors; - u32 dot11RSNAStatsTKIPLocalMICFailures; - - int key_idx; - - struct crypto_blkcipher *rx_tfm_arc4; - struct crypto_hash *rx_tfm_michael; - struct crypto_blkcipher *tx_tfm_arc4; - struct crypto_hash *tx_tfm_michael; - - /* scratch buffers for virt_to_page() (crypto API) */ - u8 rx_hdr[16], tx_hdr[16]; - - unsigned long flags; -}; - -static unsigned long ieee80211_tkip_set_flags(unsigned long flags, void *priv) -{ - struct ieee80211_tkip_data *_priv = priv; - unsigned long old_flags = _priv->flags; - _priv->flags = flags; - return old_flags; -} - -static unsigned long ieee80211_tkip_get_flags(void *priv) -{ - struct ieee80211_tkip_data *_priv = priv; - return _priv->flags; -} - -static void *ieee80211_tkip_init(int key_idx) -{ - struct ieee80211_tkip_data *priv; - - priv = kzalloc(sizeof(*priv), GFP_ATOMIC); - if (priv == NULL) - goto fail; - - priv->key_idx = key_idx; - - priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, - CRYPTO_ALG_ASYNC); - if (IS_ERR(priv->tx_tfm_arc4)) { - printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " - "crypto API arc4\n"); - priv->tx_tfm_arc4 = NULL; - goto fail; - } - - priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0, - CRYPTO_ALG_ASYNC); - if (IS_ERR(priv->tx_tfm_michael)) { - printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " - "crypto API michael_mic\n"); - priv->tx_tfm_michael = NULL; - goto fail; - } - - priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, - CRYPTO_ALG_ASYNC); - if (IS_ERR(priv->rx_tfm_arc4)) { - printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " - "crypto API arc4\n"); - priv->rx_tfm_arc4 = NULL; - goto fail; - } - - priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0, - CRYPTO_ALG_ASYNC); - if (IS_ERR(priv->rx_tfm_michael)) { - printk(KERN_DEBUG "ieee80211_crypt_tkip: could not allocate " - "crypto API michael_mic\n"); - priv->rx_tfm_michael = NULL; - goto fail; - } - - return priv; - - fail: - if (priv) { - if (priv->tx_tfm_michael) - crypto_free_hash(priv->tx_tfm_michael); - if (priv->tx_tfm_arc4) - crypto_free_blkcipher(priv->tx_tfm_arc4); - if (priv->rx_tfm_michael) - crypto_free_hash(priv->rx_tfm_michael); - if (priv->rx_tfm_arc4) - crypto_free_blkcipher(priv->rx_tfm_arc4); - kfree(priv); - } - - return NULL; -} - -static void ieee80211_tkip_deinit(void *priv) -{ - struct ieee80211_tkip_data *_priv = priv; - if (_priv) { - if (_priv->tx_tfm_michael) - crypto_free_hash(_priv->tx_tfm_michael); - if (_priv->tx_tfm_arc4) - crypto_free_blkcipher(_priv->tx_tfm_arc4); - if (_priv->rx_tfm_michael) - crypto_free_hash(_priv->rx_tfm_michael); - if (_priv->rx_tfm_arc4) - crypto_free_blkcipher(_priv->rx_tfm_arc4); - } - kfree(priv); -} - -static inline u16 RotR1(u16 val) -{ - return (val >> 1) | (val << 15); -} - -static inline u8 Lo8(u16 val) -{ - return val & 0xff; -} - -static inline u8 Hi8(u16 val) -{ - return val >> 8; -} - -static inline u16 Lo16(u32 val) -{ - return val & 0xffff; -} - -static inline u16 Hi16(u32 val) -{ - return val >> 16; -} - -static inline u16 Mk16(u8 hi, u8 lo) -{ - return lo | (((u16) hi) << 8); -} - -static inline u16 Mk16_le(__le16 * v) -{ - return le16_to_cpu(*v); -} - -static const u16 Sbox[256] = { - 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154, - 0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A, - 0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B, - 0x41EC, 0xB367, 0x5FFD, 0x45EA, 0x23BF, 0x53F7, 0xE496, 0x9B5B, - 0x75C2, 0xE11C, 0x3DAE, 0x4C6A, 0x6C5A, 0x7E41, 0xF502, 0x834F, - 0x685C, 0x51F4, 0xD134, 0xF908, 0xE293, 0xAB73, 0x6253, 0x2A3F, - 0x080C, 0x9552, 0x4665, 0x9D5E, 0x3028, 0x37A1, 0x0A0F, 0x2FB5, - 0x0E09, 0x2436, 0x1B9B, 0xDF3D, 0xCD26, 0x4E69, 0x7FCD, 0xEA9F, - 0x121B, 0x1D9E, 0x5874, 0x342E, 0x362D, 0xDCB2, 0xB4EE, 0x5BFB, - 0xA4F6, 0x764D, 0xB761, 0x7DCE, 0x527B, 0xDD3E, 0x5E71, 0x1397, - 0xA6F5, 0xB968, 0x0000, 0xC12C, 0x4060, 0xE31F, 0x79C8, 0xB6ED, - 0xD4BE, 0x8D46, 0x67D9, 0x724B, 0x94DE, 0x98D4, 0xB0E8, 0x854A, - 0xBB6B, 0xC52A, 0x4FE5, 0xED16, 0x86C5, 0x9AD7, 0x6655, 0x1194, - 0x8ACF, 0xE910, 0x0406, 0xFE81, 0xA0F0, 0x7844, 0x25BA, 0x4BE3, - 0xA2F3, 0x5DFE, 0x80C0, 0x058A, 0x3FAD, 0x21BC, 0x7048, 0xF104, - 0x63DF, 0x77C1, 0xAF75, 0x4263, 0x2030, 0xE51A, 0xFD0E, 0xBF6D, - 0x814C, 0x1814, 0x2635, 0xC32F, 0xBEE1, 0x35A2, 0x88CC, 0x2E39, - 0x9357, 0x55F2, 0xFC82, 0x7A47, 0xC8AC, 0xBAE7, 0x322B, 0xE695, - 0xC0A0, 0x1998, 0x9ED1, 0xA37F, 0x4466, 0x547E, 0x3BAB, 0x0B83, - 0x8CCA, 0xC729, 0x6BD3, 0x283C, 0xA779, 0xBCE2, 0x161D, 0xAD76, - 0xDB3B, 0x6456, 0x744E, 0x141E, 0x92DB, 0x0C0A, 0x486C, 0xB8E4, - 0x9F5D, 0xBD6E, 0x43EF, 0xC4A6, 0x39A8, 0x31A4, 0xD337, 0xF28B, - 0xD532, 0x8B43, 0x6E59, 0xDAB7, 0x018C, 0xB164, 0x9CD2, 0x49E0, - 0xD8B4, 0xACFA, 0xF307, 0xCF25, 0xCAAF, 0xF48E, 0x47E9, 0x1018, - 0x6FD5, 0xF088, 0x4A6F, 0x5C72, 0x3824, 0x57F1, 0x73C7, 0x9751, - 0xCB23, 0xA17C, 0xE89C, 0x3E21, 0x96DD, 0x61DC, 0x0D86, 0x0F85, - 0xE090, 0x7C42, 0x71C4, 0xCCAA, 0x90D8, 0x0605, 0xF701, 0x1C12, - 0xC2A3, 0x6A5F, 0xAEF9, 0x69D0, 0x1791, 0x9958, 0x3A27, 0x27B9, - 0xD938, 0xEB13, 0x2BB3, 0x2233, 0xD2BB, 0xA970, 0x0789, 0x33A7, - 0x2DB6, 0x3C22, 0x1592, 0xC920, 0x8749, 0xAAFF, 0x5078, 0xA57A, - 0x038F, 0x59F8, 0x0980, 0x1A17, 0x65DA, 0xD731, 0x84C6, 0xD0B8, - 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A, -}; - -static inline u16 _S_(u16 v) -{ - u16 t = Sbox[Hi8(v)]; - return Sbox[Lo8(v)] ^ ((t << 8) | (t >> 8)); -} - -#define PHASE1_LOOP_COUNT 8 - -static void tkip_mixing_phase1(u16 * TTAK, const u8 * TK, const u8 * TA, - u32 IV32) -{ - int i, j; - - /* Initialize the 80-bit TTAK from TSC (IV32) and TA[0..5] */ - TTAK[0] = Lo16(IV32); - TTAK[1] = Hi16(IV32); - TTAK[2] = Mk16(TA[1], TA[0]); - TTAK[3] = Mk16(TA[3], TA[2]); - TTAK[4] = Mk16(TA[5], TA[4]); - - for (i = 0; i < PHASE1_LOOP_COUNT; i++) { - j = 2 * (i & 1); - TTAK[0] += _S_(TTAK[4] ^ Mk16(TK[1 + j], TK[0 + j])); - TTAK[1] += _S_(TTAK[0] ^ Mk16(TK[5 + j], TK[4 + j])); - TTAK[2] += _S_(TTAK[1] ^ Mk16(TK[9 + j], TK[8 + j])); - TTAK[3] += _S_(TTAK[2] ^ Mk16(TK[13 + j], TK[12 + j])); - TTAK[4] += _S_(TTAK[3] ^ Mk16(TK[1 + j], TK[0 + j])) + i; - } -} - -static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK, - u16 IV16) -{ - /* Make temporary area overlap WEP seed so that the final copy can be - * avoided on little endian hosts. */ - u16 *PPK = (u16 *) & WEPSeed[4]; - - /* Step 1 - make copy of TTAK and bring in TSC */ - PPK[0] = TTAK[0]; - PPK[1] = TTAK[1]; - PPK[2] = TTAK[2]; - PPK[3] = TTAK[3]; - PPK[4] = TTAK[4]; - PPK[5] = TTAK[4] + IV16; - - /* Step 2 - 96-bit bijective mixing using S-box */ - PPK[0] += _S_(PPK[5] ^ Mk16_le((__le16 *) & TK[0])); - PPK[1] += _S_(PPK[0] ^ Mk16_le((__le16 *) & TK[2])); - PPK[2] += _S_(PPK[1] ^ Mk16_le((__le16 *) & TK[4])); - PPK[3] += _S_(PPK[2] ^ Mk16_le((__le16 *) & TK[6])); - PPK[4] += _S_(PPK[3] ^ Mk16_le((__le16 *) & TK[8])); - PPK[5] += _S_(PPK[4] ^ Mk16_le((__le16 *) & TK[10])); - - PPK[0] += RotR1(PPK[5] ^ Mk16_le((__le16 *) & TK[12])); - PPK[1] += RotR1(PPK[0] ^ Mk16_le((__le16 *) & TK[14])); - PPK[2] += RotR1(PPK[1]); - PPK[3] += RotR1(PPK[2]); - PPK[4] += RotR1(PPK[3]); - PPK[5] += RotR1(PPK[4]); - - /* Step 3 - bring in last of TK bits, assign 24-bit WEP IV value - * WEPSeed[0..2] is transmitted as WEP IV */ - WEPSeed[0] = Hi8(IV16); - WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F; - WEPSeed[2] = Lo8(IV16); - WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((__le16 *) & TK[0])) >> 1); - -#ifdef __BIG_ENDIAN - { - int i; - for (i = 0; i < 6; i++) - PPK[i] = (PPK[i] << 8) | (PPK[i] >> 8); - } -#endif -} - -static int ieee80211_tkip_hdr(struct sk_buff *skb, int hdr_len, - u8 * rc4key, int keylen, void *priv) -{ - struct ieee80211_tkip_data *tkey = priv; - int len; - u8 *pos; - struct ieee80211_hdr_4addr *hdr; - - hdr = (struct ieee80211_hdr_4addr *)skb->data; - - if (skb_headroom(skb) < 8 || skb->len < hdr_len) - return -1; - - if (rc4key == NULL || keylen < 16) - return -1; - - if (!tkey->tx_phase1_done) { - tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2, - tkey->tx_iv32); - tkey->tx_phase1_done = 1; - } - tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, tkey->tx_iv16); - - len = skb->len - hdr_len; - pos = skb_push(skb, 8); - memmove(pos, pos + 8, hdr_len); - pos += hdr_len; - - *pos++ = *rc4key; - *pos++ = *(rc4key + 1); - *pos++ = *(rc4key + 2); - *pos++ = (tkey->key_idx << 6) | (1 << 5) /* Ext IV included */ ; - *pos++ = tkey->tx_iv32 & 0xff; - *pos++ = (tkey->tx_iv32 >> 8) & 0xff; - *pos++ = (tkey->tx_iv32 >> 16) & 0xff; - *pos++ = (tkey->tx_iv32 >> 24) & 0xff; - - tkey->tx_iv16++; - if (tkey->tx_iv16 == 0) { - tkey->tx_phase1_done = 0; - tkey->tx_iv32++; - } - - return 8; -} - -static int ieee80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct ieee80211_tkip_data *tkey = priv; - struct blkcipher_desc desc = { .tfm = tkey->tx_tfm_arc4 }; - int len; - u8 rc4key[16], *pos, *icv; - u32 crc; - struct scatterlist sg; - - if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { - if (net_ratelimit()) { - struct ieee80211_hdr_4addr *hdr = - (struct ieee80211_hdr_4addr *)skb->data; - printk(KERN_DEBUG ": TKIP countermeasures: dropped " - "TX packet to %pM\n", hdr->addr1); - } - return -1; - } - - if (skb_tailroom(skb) < 4 || skb->len < hdr_len) - return -1; - - len = skb->len - hdr_len; - pos = skb->data + hdr_len; - - if ((ieee80211_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0) - return -1; - - icv = skb_put(skb, 4); - - crc = ~crc32_le(~0, pos, len); - icv[0] = crc; - icv[1] = crc >> 8; - icv[2] = crc >> 16; - icv[3] = crc >> 24; - - crypto_blkcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16); - sg_init_one(&sg, pos, len + 4); - return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); -} - -/* - * deal with seq counter wrapping correctly. - * refer to timer_after() for jiffies wrapping handling - */ -static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n, - u32 iv32_o, u16 iv16_o) -{ - if ((s32)iv32_n - (s32)iv32_o < 0 || - (iv32_n == iv32_o && iv16_n <= iv16_o)) - return 1; - return 0; -} - -static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct ieee80211_tkip_data *tkey = priv; - struct blkcipher_desc desc = { .tfm = tkey->rx_tfm_arc4 }; - u8 rc4key[16]; - u8 keyidx, *pos; - u32 iv32; - u16 iv16; - struct ieee80211_hdr_4addr *hdr; - u8 icv[4]; - u32 crc; - struct scatterlist sg; - int plen; - - hdr = (struct ieee80211_hdr_4addr *)skb->data; - - if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { - if (net_ratelimit()) { - printk(KERN_DEBUG ": TKIP countermeasures: dropped " - "received packet from %pM\n", hdr->addr2); - } - return -1; - } - - if (skb->len < hdr_len + 8 + 4) - return -1; - - pos = skb->data + hdr_len; - keyidx = pos[3]; - if (!(keyidx & (1 << 5))) { - if (net_ratelimit()) { - printk(KERN_DEBUG "TKIP: received packet without ExtIV" - " flag from %pM\n", hdr->addr2); - } - return -2; - } - keyidx >>= 6; - if (tkey->key_idx != keyidx) { - printk(KERN_DEBUG "TKIP: RX tkey->key_idx=%d frame " - "keyidx=%d priv=%p\n", tkey->key_idx, keyidx, priv); - return -6; - } - if (!tkey->key_set) { - if (net_ratelimit()) { - printk(KERN_DEBUG "TKIP: received packet from %pM" - " with keyid=%d that does not have a configured" - " key\n", hdr->addr2, keyidx); - } - return -3; - } - iv16 = (pos[0] << 8) | pos[2]; - iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24); - pos += 8; - - if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { - if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) { - IEEE80211_DEBUG_DROP("TKIP: replay detected: STA=%pM" - " previous TSC %08x%04x received TSC " - "%08x%04x\n", hdr->addr2, - tkey->rx_iv32, tkey->rx_iv16, iv32, iv16); - } - tkey->dot11RSNAStatsTKIPReplays++; - return -4; - } - - if (iv32 != tkey->rx_iv32 || !tkey->rx_phase1_done) { - tkip_mixing_phase1(tkey->rx_ttak, tkey->key, hdr->addr2, iv32); - tkey->rx_phase1_done = 1; - } - tkip_mixing_phase2(rc4key, tkey->key, tkey->rx_ttak, iv16); - - plen = skb->len - hdr_len - 12; - - crypto_blkcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16); - sg_init_one(&sg, pos, plen + 4); - if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) { - if (net_ratelimit()) { - printk(KERN_DEBUG ": TKIP: failed to decrypt " - "received packet from %pM\n", - hdr->addr2); - } - return -7; - } - - crc = ~crc32_le(~0, pos, plen); - icv[0] = crc; - icv[1] = crc >> 8; - icv[2] = crc >> 16; - icv[3] = crc >> 24; - if (memcmp(icv, pos + plen, 4) != 0) { - if (iv32 != tkey->rx_iv32) { - /* Previously cached Phase1 result was already lost, so - * it needs to be recalculated for the next packet. */ - tkey->rx_phase1_done = 0; - } - if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) { - IEEE80211_DEBUG_DROP("TKIP: ICV error detected: STA=" - "%pM\n", hdr->addr2); - } - tkey->dot11RSNAStatsTKIPICVErrors++; - return -5; - } - - /* Update real counters only after Michael MIC verification has - * completed */ - tkey->rx_iv32_new = iv32; - tkey->rx_iv16_new = iv16; - - /* Remove IV and ICV */ - memmove(skb->data + 8, skb->data, hdr_len); - skb_pull(skb, 8); - skb_trim(skb, skb->len - 4); - - return keyidx; -} - -static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr, - u8 * data, size_t data_len, u8 * mic) -{ - struct hash_desc desc; - struct scatterlist sg[2]; - - if (tfm_michael == NULL) { - printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n"); - return -1; - } - sg_init_table(sg, 2); - sg_set_buf(&sg[0], hdr, 16); - sg_set_buf(&sg[1], data, data_len); - - if (crypto_hash_setkey(tfm_michael, key, 8)) - return -1; - - desc.tfm = tfm_michael; - desc.flags = 0; - return crypto_hash_digest(&desc, sg, data_len + 16, mic); -} - -static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) -{ - struct ieee80211_hdr_4addr *hdr11; - u16 stype; - - hdr11 = (struct ieee80211_hdr_4addr *)skb->data; - stype = WLAN_FC_GET_STYPE(le16_to_cpu(hdr11->frame_ctl)); - - switch (le16_to_cpu(hdr11->frame_ctl) & - (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { - case IEEE80211_FCTL_TODS: - memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ - break; - case IEEE80211_FCTL_FROMDS: - memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr3, ETH_ALEN); /* SA */ - break; - case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS: - memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN); /* SA */ - break; - case 0: - memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ - memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ - break; - } - - if (stype & IEEE80211_STYPE_QOS_DATA) { - const struct ieee80211_hdr_3addrqos *qoshdr = - (struct ieee80211_hdr_3addrqos *)skb->data; - hdr[12] = le16_to_cpu(qoshdr->qos_ctl) & IEEE80211_QCTL_TID; - } else - hdr[12] = 0; /* priority */ - - hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */ -} - -static int ieee80211_michael_mic_add(struct sk_buff *skb, int hdr_len, - void *priv) -{ - struct ieee80211_tkip_data *tkey = priv; - u8 *pos; - - if (skb_tailroom(skb) < 8 || skb->len < hdr_len) { - printk(KERN_DEBUG "Invalid packet for Michael MIC add " - "(tailroom=%d hdr_len=%d skb->len=%d)\n", - skb_tailroom(skb), hdr_len, skb->len); - return -1; - } - - michael_mic_hdr(skb, tkey->tx_hdr); - pos = skb_put(skb, 8); - if (michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr, - skb->data + hdr_len, skb->len - 8 - hdr_len, pos)) - return -1; - - return 0; -} - -static void ieee80211_michael_mic_failure(struct net_device *dev, - struct ieee80211_hdr_4addr *hdr, - int keyidx) -{ - union iwreq_data wrqu; - struct iw_michaelmicfailure ev; - - /* TODO: needed parameters: count, keyid, key type, TSC */ - memset(&ev, 0, sizeof(ev)); - ev.flags = keyidx & IW_MICFAILURE_KEY_ID; - if (hdr->addr1[0] & 0x01) - ev.flags |= IW_MICFAILURE_GROUP; - else - ev.flags |= IW_MICFAILURE_PAIRWISE; - ev.src_addr.sa_family = ARPHRD_ETHER; - memcpy(ev.src_addr.sa_data, hdr->addr2, ETH_ALEN); - memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = sizeof(ev); - wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *)&ev); -} - -static int ieee80211_michael_mic_verify(struct sk_buff *skb, int keyidx, - int hdr_len, void *priv) -{ - struct ieee80211_tkip_data *tkey = priv; - u8 mic[8]; - - if (!tkey->key_set) - return -1; - - michael_mic_hdr(skb, tkey->rx_hdr); - if (michael_mic(tkey->rx_tfm_michael, &tkey->key[24], tkey->rx_hdr, - skb->data + hdr_len, skb->len - 8 - hdr_len, mic)) - return -1; - if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) { - struct ieee80211_hdr_4addr *hdr; - hdr = (struct ieee80211_hdr_4addr *)skb->data; - printk(KERN_DEBUG "%s: Michael MIC verification failed for " - "MSDU from %pM keyidx=%d\n", - skb->dev ? skb->dev->name : "N/A", hdr->addr2, - keyidx); - if (skb->dev) - ieee80211_michael_mic_failure(skb->dev, hdr, keyidx); - tkey->dot11RSNAStatsTKIPLocalMICFailures++; - return -1; - } - - /* Update TSC counters for RX now that the packet verification has - * completed. */ - tkey->rx_iv32 = tkey->rx_iv32_new; - tkey->rx_iv16 = tkey->rx_iv16_new; - - skb_trim(skb, skb->len - 8); - - return 0; -} - -static int ieee80211_tkip_set_key(void *key, int len, u8 * seq, void *priv) -{ - struct ieee80211_tkip_data *tkey = priv; - int keyidx; - struct crypto_hash *tfm = tkey->tx_tfm_michael; - struct crypto_blkcipher *tfm2 = tkey->tx_tfm_arc4; - struct crypto_hash *tfm3 = tkey->rx_tfm_michael; - struct crypto_blkcipher *tfm4 = tkey->rx_tfm_arc4; - - keyidx = tkey->key_idx; - memset(tkey, 0, sizeof(*tkey)); - tkey->key_idx = keyidx; - tkey->tx_tfm_michael = tfm; - tkey->tx_tfm_arc4 = tfm2; - tkey->rx_tfm_michael = tfm3; - tkey->rx_tfm_arc4 = tfm4; - if (len == TKIP_KEY_LEN) { - memcpy(tkey->key, key, TKIP_KEY_LEN); - tkey->key_set = 1; - tkey->tx_iv16 = 1; /* TSC is initialized to 1 */ - if (seq) { - tkey->rx_iv32 = (seq[5] << 24) | (seq[4] << 16) | - (seq[3] << 8) | seq[2]; - tkey->rx_iv16 = (seq[1] << 8) | seq[0]; - } - } else if (len == 0) - tkey->key_set = 0; - else - return -1; - - return 0; -} - -static int ieee80211_tkip_get_key(void *key, int len, u8 * seq, void *priv) -{ - struct ieee80211_tkip_data *tkey = priv; - - if (len < TKIP_KEY_LEN) - return -1; - - if (!tkey->key_set) - return 0; - memcpy(key, tkey->key, TKIP_KEY_LEN); - - if (seq) { - /* Return the sequence number of the last transmitted frame. */ - u16 iv16 = tkey->tx_iv16; - u32 iv32 = tkey->tx_iv32; - if (iv16 == 0) - iv32--; - iv16--; - seq[0] = tkey->tx_iv16; - seq[1] = tkey->tx_iv16 >> 8; - seq[2] = tkey->tx_iv32; - seq[3] = tkey->tx_iv32 >> 8; - seq[4] = tkey->tx_iv32 >> 16; - seq[5] = tkey->tx_iv32 >> 24; - } - - return TKIP_KEY_LEN; -} - -static char *ieee80211_tkip_print_stats(char *p, void *priv) -{ - struct ieee80211_tkip_data *tkip = priv; - p += sprintf(p, "key[%d] alg=TKIP key_set=%d " - "tx_pn=%02x%02x%02x%02x%02x%02x " - "rx_pn=%02x%02x%02x%02x%02x%02x " - "replays=%d icv_errors=%d local_mic_failures=%d\n", - tkip->key_idx, tkip->key_set, - (tkip->tx_iv32 >> 24) & 0xff, - (tkip->tx_iv32 >> 16) & 0xff, - (tkip->tx_iv32 >> 8) & 0xff, - tkip->tx_iv32 & 0xff, - (tkip->tx_iv16 >> 8) & 0xff, - tkip->tx_iv16 & 0xff, - (tkip->rx_iv32 >> 24) & 0xff, - (tkip->rx_iv32 >> 16) & 0xff, - (tkip->rx_iv32 >> 8) & 0xff, - tkip->rx_iv32 & 0xff, - (tkip->rx_iv16 >> 8) & 0xff, - tkip->rx_iv16 & 0xff, - tkip->dot11RSNAStatsTKIPReplays, - tkip->dot11RSNAStatsTKIPICVErrors, - tkip->dot11RSNAStatsTKIPLocalMICFailures); - return p; -} - -static struct ieee80211_crypto_ops ieee80211_crypt_tkip = { - .name = "TKIP", - .init = ieee80211_tkip_init, - .deinit = ieee80211_tkip_deinit, - .build_iv = ieee80211_tkip_hdr, - .encrypt_mpdu = ieee80211_tkip_encrypt, - .decrypt_mpdu = ieee80211_tkip_decrypt, - .encrypt_msdu = ieee80211_michael_mic_add, - .decrypt_msdu = ieee80211_michael_mic_verify, - .set_key = ieee80211_tkip_set_key, - .get_key = ieee80211_tkip_get_key, - .print_stats = ieee80211_tkip_print_stats, - .extra_mpdu_prefix_len = 4 + 4, /* IV + ExtIV */ - .extra_mpdu_postfix_len = 4, /* ICV */ - .extra_msdu_postfix_len = 8, /* MIC */ - .get_flags = ieee80211_tkip_get_flags, - .set_flags = ieee80211_tkip_set_flags, - .owner = THIS_MODULE, -}; - -static int __init ieee80211_crypto_tkip_init(void) -{ - return ieee80211_register_crypto_ops(&ieee80211_crypt_tkip); -} - -static void __exit ieee80211_crypto_tkip_exit(void) -{ - ieee80211_unregister_crypto_ops(&ieee80211_crypt_tkip); -} - -module_init(ieee80211_crypto_tkip_init); -module_exit(ieee80211_crypto_tkip_exit); diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c deleted file mode 100644 index 3fa30c40779f..000000000000 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ /dev/null @@ -1,295 +0,0 @@ -/* - * Host AP crypt: host-based WEP encryption implementation for Host AP driver - * - * Copyright (c) 2002-2004, Jouni Malinen - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. See README and COPYING for - * more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -MODULE_AUTHOR("Jouni Malinen"); -MODULE_DESCRIPTION("Host AP crypt: WEP"); -MODULE_LICENSE("GPL"); - -struct prism2_wep_data { - u32 iv; -#define WEP_KEY_LEN 13 - u8 key[WEP_KEY_LEN + 1]; - u8 key_len; - u8 key_idx; - struct crypto_blkcipher *tx_tfm; - struct crypto_blkcipher *rx_tfm; -}; - -static void *prism2_wep_init(int keyidx) -{ - struct prism2_wep_data *priv; - - priv = kzalloc(sizeof(*priv), GFP_ATOMIC); - if (priv == NULL) - goto fail; - priv->key_idx = keyidx; - - priv->tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(priv->tx_tfm)) { - printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate " - "crypto API arc4\n"); - priv->tx_tfm = NULL; - goto fail; - } - - priv->rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(priv->rx_tfm)) { - printk(KERN_DEBUG "ieee80211_crypt_wep: could not allocate " - "crypto API arc4\n"); - priv->rx_tfm = NULL; - goto fail; - } - /* start WEP IV from a random value */ - get_random_bytes(&priv->iv, 4); - - return priv; - - fail: - if (priv) { - if (priv->tx_tfm) - crypto_free_blkcipher(priv->tx_tfm); - if (priv->rx_tfm) - crypto_free_blkcipher(priv->rx_tfm); - kfree(priv); - } - return NULL; -} - -static void prism2_wep_deinit(void *priv) -{ - struct prism2_wep_data *_priv = priv; - if (_priv) { - if (_priv->tx_tfm) - crypto_free_blkcipher(_priv->tx_tfm); - if (_priv->rx_tfm) - crypto_free_blkcipher(_priv->rx_tfm); - } - kfree(priv); -} - -/* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */ -static int prism2_wep_build_iv(struct sk_buff *skb, int hdr_len, - u8 *key, int keylen, void *priv) -{ - struct prism2_wep_data *wep = priv; - u32 klen, len; - u8 *pos; - - if (skb_headroom(skb) < 4 || skb->len < hdr_len) - return -1; - - len = skb->len - hdr_len; - pos = skb_push(skb, 4); - memmove(pos, pos + 4, hdr_len); - pos += hdr_len; - - klen = 3 + wep->key_len; - - wep->iv++; - - /* Fluhrer, Mantin, and Shamir have reported weaknesses in the key - * scheduling algorithm of RC4. At least IVs (KeyByte + 3, 0xff, N) - * can be used to speedup attacks, so avoid using them. */ - if ((wep->iv & 0xff00) == 0xff00) { - u8 B = (wep->iv >> 16) & 0xff; - if (B >= 3 && B < klen) - wep->iv += 0x0100; - } - - /* Prepend 24-bit IV to RC4 key and TX frame */ - *pos++ = (wep->iv >> 16) & 0xff; - *pos++ = (wep->iv >> 8) & 0xff; - *pos++ = wep->iv & 0xff; - *pos++ = wep->key_idx << 6; - - return 0; -} - -/* Perform WEP encryption on given skb that has at least 4 bytes of headroom - * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted, - * so the payload length increases with 8 bytes. - * - * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data)) - */ -static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct prism2_wep_data *wep = priv; - struct blkcipher_desc desc = { .tfm = wep->tx_tfm }; - u32 crc, klen, len; - u8 *pos, *icv; - struct scatterlist sg; - u8 key[WEP_KEY_LEN + 3]; - - /* other checks are in prism2_wep_build_iv */ - if (skb_tailroom(skb) < 4) - return -1; - - /* add the IV to the frame */ - if (prism2_wep_build_iv(skb, hdr_len, NULL, 0, priv)) - return -1; - - /* Copy the IV into the first 3 bytes of the key */ - skb_copy_from_linear_data_offset(skb, hdr_len, key, 3); - - /* Copy rest of the WEP key (the secret part) */ - memcpy(key + 3, wep->key, wep->key_len); - - len = skb->len - hdr_len - 4; - pos = skb->data + hdr_len + 4; - klen = 3 + wep->key_len; - - /* Append little-endian CRC32 over only the data and encrypt it to produce ICV */ - crc = ~crc32_le(~0, pos, len); - icv = skb_put(skb, 4); - icv[0] = crc; - icv[1] = crc >> 8; - icv[2] = crc >> 16; - icv[3] = crc >> 24; - - crypto_blkcipher_setkey(wep->tx_tfm, key, klen); - sg_init_one(&sg, pos, len + 4); - return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); -} - -/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of - * the frame: IV (4 bytes), encrypted payload (including SNAP header), - * ICV (4 bytes). len includes both IV and ICV. - * - * Returns 0 if frame was decrypted successfully and ICV was correct and -1 on - * failure. If frame is OK, IV and ICV will be removed. - */ -static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) -{ - struct prism2_wep_data *wep = priv; - struct blkcipher_desc desc = { .tfm = wep->rx_tfm }; - u32 crc, klen, plen; - u8 key[WEP_KEY_LEN + 3]; - u8 keyidx, *pos, icv[4]; - struct scatterlist sg; - - if (skb->len < hdr_len + 8) - return -1; - - pos = skb->data + hdr_len; - key[0] = *pos++; - key[1] = *pos++; - key[2] = *pos++; - keyidx = *pos++ >> 6; - if (keyidx != wep->key_idx) - return -1; - - klen = 3 + wep->key_len; - - /* Copy rest of the WEP key (the secret part) */ - memcpy(key + 3, wep->key, wep->key_len); - - /* Apply RC4 to data and compute CRC32 over decrypted data */ - plen = skb->len - hdr_len - 8; - - crypto_blkcipher_setkey(wep->rx_tfm, key, klen); - sg_init_one(&sg, pos, plen + 4); - if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) - return -7; - - crc = ~crc32_le(~0, pos, plen); - icv[0] = crc; - icv[1] = crc >> 8; - icv[2] = crc >> 16; - icv[3] = crc >> 24; - if (memcmp(icv, pos + plen, 4) != 0) { - /* ICV mismatch - drop frame */ - return -2; - } - - /* Remove IV and ICV */ - memmove(skb->data + 4, skb->data, hdr_len); - skb_pull(skb, 4); - skb_trim(skb, skb->len - 4); - - return 0; -} - -static int prism2_wep_set_key(void *key, int len, u8 * seq, void *priv) -{ - struct prism2_wep_data *wep = priv; - - if (len < 0 || len > WEP_KEY_LEN) - return -1; - - memcpy(wep->key, key, len); - wep->key_len = len; - - return 0; -} - -static int prism2_wep_get_key(void *key, int len, u8 * seq, void *priv) -{ - struct prism2_wep_data *wep = priv; - - if (len < wep->key_len) - return -1; - - memcpy(key, wep->key, wep->key_len); - - return wep->key_len; -} - -static char *prism2_wep_print_stats(char *p, void *priv) -{ - struct prism2_wep_data *wep = priv; - p += sprintf(p, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len); - return p; -} - -static struct ieee80211_crypto_ops ieee80211_crypt_wep = { - .name = "WEP", - .init = prism2_wep_init, - .deinit = prism2_wep_deinit, - .build_iv = prism2_wep_build_iv, - .encrypt_mpdu = prism2_wep_encrypt, - .decrypt_mpdu = prism2_wep_decrypt, - .encrypt_msdu = NULL, - .decrypt_msdu = NULL, - .set_key = prism2_wep_set_key, - .get_key = prism2_wep_get_key, - .print_stats = prism2_wep_print_stats, - .extra_mpdu_prefix_len = 4, /* IV */ - .extra_mpdu_postfix_len = 4, /* ICV */ - .owner = THIS_MODULE, -}; - -static int __init ieee80211_crypto_wep_init(void) -{ - return ieee80211_register_crypto_ops(&ieee80211_crypt_wep); -} - -static void __exit ieee80211_crypto_wep_exit(void) -{ - ieee80211_unregister_crypto_ops(&ieee80211_crypt_wep); -} - -module_init(ieee80211_crypto_wep_init); -module_exit(ieee80211_crypto_wep_exit); diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index d34d4e79b6f7..cf21f0bd8569 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -180,13 +180,16 @@ struct net_device *alloc_ieee80211(int sizeof_priv) ieee->host_open_frag = 1; ieee->ieee802_1x = 1; /* Default to supporting 802.1x */ - INIT_LIST_HEAD(&ieee->crypt_deinit_list); - setup_timer(&ieee->crypt_deinit_timer, ieee80211_crypt_deinit_handler, - (unsigned long)ieee); - ieee->crypt_quiesced = 0; - spin_lock_init(&ieee->lock); + ieee->crypt_info.name = dev->name; + ieee->crypt_info.lock = &ieee->lock; + INIT_LIST_HEAD(&ieee->crypt_info.crypt_deinit_list); + setup_timer(&ieee->crypt_info.crypt_deinit_timer, + lib80211_crypt_deinit_handler, + (unsigned long)&ieee->crypt_info); + ieee->crypt_info.crypt_quiesced = 0; + ieee->wpa_enabled = 0; ieee->drop_unencrypted = 0; ieee->privacy_invoked = 0; @@ -205,19 +208,19 @@ void free_ieee80211(struct net_device *dev) int i; - ieee80211_crypt_quiescing(ieee); - del_timer_sync(&ieee->crypt_deinit_timer); - ieee80211_crypt_deinit_entries(ieee, 1); + lib80211_crypt_quiescing(&ieee->crypt_info); + del_timer_sync(&ieee->crypt_info.crypt_deinit_timer); + lib80211_crypt_deinit_entries(&ieee->crypt_info, 1); for (i = 0; i < WEP_KEYS; i++) { - struct ieee80211_crypt_data *crypt = ieee->crypt[i]; + struct lib80211_crypt_data *crypt = ieee->crypt_info.crypt[i]; if (crypt) { if (crypt->ops) { crypt->ops->deinit(crypt->priv); module_put(crypt->ops->owner); } kfree(crypt); - ieee->crypt[i] = NULL; + ieee->crypt_info.crypt[i] = NULL; } } diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index 3dd58b594f6a..9c67dfae4320 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -268,7 +268,7 @@ static int ieee80211_is_eapol_frame(struct ieee80211_device *ieee, /* Called only as a tasklet (software IRQ), by ieee80211_rx */ static int ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb, - struct ieee80211_crypt_data *crypt) + struct lib80211_crypt_data *crypt) { struct ieee80211_hdr_3addr *hdr; int res, hdrlen; @@ -300,7 +300,7 @@ ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb, static int ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee, struct sk_buff *skb, int keyidx, - struct ieee80211_crypt_data *crypt) + struct lib80211_crypt_data *crypt) { struct ieee80211_hdr_3addr *hdr; int res, hdrlen; @@ -348,7 +348,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, #endif u8 dst[ETH_ALEN]; u8 src[ETH_ALEN]; - struct ieee80211_crypt_data *crypt = NULL; + struct lib80211_crypt_data *crypt = NULL; int keyidx = 0; int can_be_decrypted = 0; @@ -431,7 +431,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, * is only allowed 2-bits of storage, no value of keyidx can * be provided via above code that would result in keyidx * being out of range */ - crypt = ieee->crypt[keyidx]; + crypt = ieee->crypt_info.crypt[keyidx]; #ifdef NOT_YET sta = NULL; diff --git a/net/ieee80211/ieee80211_tx.c b/net/ieee80211/ieee80211_tx.c index d996547f7a62..f78f57e8844a 100644 --- a/net/ieee80211/ieee80211_tx.c +++ b/net/ieee80211/ieee80211_tx.c @@ -152,7 +152,8 @@ static int ieee80211_copy_snap(u8 * data, __be16 h_proto) static int ieee80211_encrypt_fragment(struct ieee80211_device *ieee, struct sk_buff *frag, int hdr_len) { - struct ieee80211_crypt_data *crypt = ieee->crypt[ieee->tx_keyidx]; + struct lib80211_crypt_data *crypt = + ieee->crypt_info.crypt[ieee->crypt_info.tx_keyidx]; int res; if (crypt == NULL) @@ -270,7 +271,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) .qos_ctl = 0 }; u8 dest[ETH_ALEN], src[ETH_ALEN]; - struct ieee80211_crypt_data *crypt; + struct lib80211_crypt_data *crypt; int priority = skb->priority; int snapped = 0; @@ -294,7 +295,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev) ether_type = ((struct ethhdr *)skb->data)->h_proto; - crypt = ieee->crypt[ieee->tx_keyidx]; + crypt = ieee->crypt_info.crypt[ieee->crypt_info.tx_keyidx]; encrypt = !(ether_type == htons(ETH_P_PAE) && ieee->ieee802_1x) && ieee->sec.encrypt; diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 7cc4e5ee3660..31ea3abfc327 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -307,7 +307,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, .flags = 0 }; int i, key, key_provided, len; - struct ieee80211_crypt_data **crypt; + struct lib80211_crypt_data **crypt; int host_crypto = ieee->host_encrypt || ieee->host_decrypt || ieee->host_build_iv; DECLARE_SSID_BUF(ssid); @@ -321,30 +321,30 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, key_provided = 1; } else { key_provided = 0; - key = ieee->tx_keyidx; + key = ieee->crypt_info.tx_keyidx; } IEEE80211_DEBUG_WX("Key: %d [%s]\n", key, key_provided ? "provided" : "default"); - crypt = &ieee->crypt[key]; + crypt = &ieee->crypt_info.crypt[key]; if (erq->flags & IW_ENCODE_DISABLED) { if (key_provided && *crypt) { IEEE80211_DEBUG_WX("Disabling encryption on key %d.\n", key); - ieee80211_crypt_delayed_deinit(ieee, crypt); + lib80211_crypt_delayed_deinit(&ieee->crypt_info, crypt); } else IEEE80211_DEBUG_WX("Disabling encryption.\n"); /* Check all the keys to see if any are still configured, * and if no key index was provided, de-init them all */ for (i = 0; i < WEP_KEYS; i++) { - if (ieee->crypt[i] != NULL) { + if (ieee->crypt_info.crypt[i] != NULL) { if (key_provided) break; - ieee80211_crypt_delayed_deinit(ieee, - &ieee->crypt[i]); + lib80211_crypt_delayed_deinit(&ieee->crypt_info, + &ieee->crypt_info.crypt[i]); } } @@ -366,21 +366,21 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, strcmp((*crypt)->ops->name, "WEP") != 0) { /* changing to use WEP; deinit previously used algorithm * on this key */ - ieee80211_crypt_delayed_deinit(ieee, crypt); + lib80211_crypt_delayed_deinit(&ieee->crypt_info, crypt); } if (*crypt == NULL && host_crypto) { - struct ieee80211_crypt_data *new_crypt; + struct lib80211_crypt_data *new_crypt; /* take WEP into use */ - new_crypt = kzalloc(sizeof(struct ieee80211_crypt_data), + new_crypt = kzalloc(sizeof(struct lib80211_crypt_data), GFP_KERNEL); if (new_crypt == NULL) return -ENOMEM; - new_crypt->ops = ieee80211_get_crypto_ops("WEP"); + new_crypt->ops = lib80211_get_crypto_ops("WEP"); if (!new_crypt->ops) { - request_module("ieee80211_crypt_wep"); - new_crypt->ops = ieee80211_get_crypto_ops("WEP"); + request_module("lib80211_crypt_wep"); + new_crypt->ops = lib80211_get_crypto_ops("WEP"); } if (new_crypt->ops && try_module_get(new_crypt->ops->owner)) @@ -391,7 +391,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, new_crypt = NULL; printk(KERN_WARNING "%s: could not initialize WEP: " - "load module ieee80211_crypt_wep\n", dev->name); + "load module lib80211_crypt_wep\n", dev->name); return -EOPNOTSUPP; } *crypt = new_crypt; @@ -440,7 +440,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee, if (key_provided) { IEEE80211_DEBUG_WX("Setting key %d to default Tx " "key.\n", key); - ieee->tx_keyidx = key; + ieee->crypt_info.tx_keyidx = key; sec.active_key = key; sec.flags |= SEC_ACTIVE_KEY; } @@ -485,7 +485,7 @@ int ieee80211_wx_get_encode(struct ieee80211_device *ieee, { struct iw_point *erq = &(wrqu->encoding); int len, key; - struct ieee80211_crypt_data *crypt; + struct lib80211_crypt_data *crypt; struct ieee80211_security *sec = &ieee->sec; IEEE80211_DEBUG_WX("GET_ENCODE\n"); @@ -496,9 +496,9 @@ int ieee80211_wx_get_encode(struct ieee80211_device *ieee, return -EINVAL; key--; } else - key = ieee->tx_keyidx; + key = ieee->crypt_info.tx_keyidx; - crypt = ieee->crypt[key]; + crypt = ieee->crypt_info.crypt[key]; erq->flags = key + 1; if (!sec->enabled) { @@ -531,8 +531,8 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee, int i, idx, ret = 0; int group_key = 0; const char *alg, *module; - struct ieee80211_crypto_ops *ops; - struct ieee80211_crypt_data **crypt; + struct lib80211_crypto_ops *ops; + struct lib80211_crypt_data **crypt; struct ieee80211_security sec = { .flags = 0, @@ -544,17 +544,17 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee, return -EINVAL; idx--; } else - idx = ieee->tx_keyidx; + idx = ieee->crypt_info.tx_keyidx; if (ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY) { - crypt = &ieee->crypt[idx]; + crypt = &ieee->crypt_info.crypt[idx]; group_key = 1; } else { /* some Cisco APs use idx>0 for unicast in dynamic WEP */ if (idx != 0 && ext->alg != IW_ENCODE_ALG_WEP) return -EINVAL; if (ieee->iw_mode == IW_MODE_INFRA) - crypt = &ieee->crypt[idx]; + crypt = &ieee->crypt_info.crypt[idx]; else return -EINVAL; } @@ -563,10 +563,10 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee, if ((encoding->flags & IW_ENCODE_DISABLED) || ext->alg == IW_ENCODE_ALG_NONE) { if (*crypt) - ieee80211_crypt_delayed_deinit(ieee, crypt); + lib80211_crypt_delayed_deinit(&ieee->crypt_info, crypt); for (i = 0; i < WEP_KEYS; i++) - if (ieee->crypt[i] != NULL) + if (ieee->crypt_info.crypt[i] != NULL) break; if (i == WEP_KEYS) { @@ -589,15 +589,15 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee, switch (ext->alg) { case IW_ENCODE_ALG_WEP: alg = "WEP"; - module = "ieee80211_crypt_wep"; + module = "lib80211_crypt_wep"; break; case IW_ENCODE_ALG_TKIP: alg = "TKIP"; - module = "ieee80211_crypt_tkip"; + module = "lib80211_crypt_tkip"; break; case IW_ENCODE_ALG_CCMP: alg = "CCMP"; - module = "ieee80211_crypt_ccmp"; + module = "lib80211_crypt_ccmp"; break; default: IEEE80211_DEBUG_WX("%s: unknown crypto alg %d\n", @@ -606,10 +606,10 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee, goto done; } - ops = ieee80211_get_crypto_ops(alg); + ops = lib80211_get_crypto_ops(alg); if (ops == NULL) { request_module(module); - ops = ieee80211_get_crypto_ops(alg); + ops = lib80211_get_crypto_ops(alg); } if (ops == NULL) { IEEE80211_DEBUG_WX("%s: unknown crypto alg %d\n", @@ -619,9 +619,9 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee, } if (*crypt == NULL || (*crypt)->ops != ops) { - struct ieee80211_crypt_data *new_crypt; + struct lib80211_crypt_data *new_crypt; - ieee80211_crypt_delayed_deinit(ieee, crypt); + lib80211_crypt_delayed_deinit(&ieee->crypt_info, crypt); new_crypt = kzalloc(sizeof(*new_crypt), GFP_KERNEL); if (new_crypt == NULL) { @@ -649,7 +649,7 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee, skip_host_crypt: if (ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY) { - ieee->tx_keyidx = idx; + ieee->crypt_info.tx_keyidx = idx; sec.active_key = idx; sec.flags |= SEC_ACTIVE_KEY; } @@ -715,7 +715,7 @@ int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee, return -EINVAL; idx--; } else - idx = ieee->tx_keyidx; + idx = ieee->crypt_info.tx_keyidx; if (!(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY) && ext->alg != IW_ENCODE_ALG_WEP) diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index ae7f2262dfb5..f7c64dbe86cc 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -82,3 +82,12 @@ config LIB80211 Drivers should select this themselves if needed. Say Y if you want this built into your kernel. + +config LIB80211_CRYPT_WEP + tristate + +config LIB80211_CRYPT_CCMP + tristate + +config LIB80211_CRYPT_TKIP + tristate diff --git a/net/wireless/Makefile b/net/wireless/Makefile index d2d848d445f2..cc547edb111f 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -1,6 +1,9 @@ obj-$(CONFIG_WIRELESS_EXT) += wext.o obj-$(CONFIG_CFG80211) += cfg80211.o obj-$(CONFIG_LIB80211) += lib80211.o +obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o +obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o +obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o cfg80211-$(CONFIG_NL80211) += nl80211.o diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c index e71f7d085621..d681721f4acb 100644 --- a/net/wireless/lib80211.c +++ b/net/wireless/lib80211.c @@ -3,11 +3,23 @@ * * Copyright(c) 2008 John W. Linville * + * Portions copied from old ieee80211 component, w/ original copyright + * notices below: + * + * Host AP crypto routines + * + * Copyright (c) 2002-2003, Jouni Malinen + * Portions Copyright (C) 2004, Intel Corporation + * */ #include #include #include +#include +#include +#include +#include #include @@ -19,6 +31,14 @@ MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_AUTHOR("John W. Linville "); MODULE_LICENSE("GPL"); +struct lib80211_crypto_alg { + struct list_head list; + struct lib80211_crypto_ops *ops; +}; + +static LIST_HEAD(lib80211_crypto_algs); +static DEFINE_SPINLOCK(lib80211_crypto_lock); + const char *print_ssid(char *buf, const char *ssid, u8 ssid_len) { const char *s = ssid; @@ -51,15 +71,176 @@ const char *print_ssid(char *buf, const char *ssid, u8 ssid_len) } EXPORT_SYMBOL(print_ssid); -static int __init ieee80211_init(void) +void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, int force) { - printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n"); + struct lib80211_crypt_data *entry, *next; + unsigned long flags; + + spin_lock_irqsave(info->lock, flags); + list_for_each_entry_safe(entry, next, &info->crypt_deinit_list, list) { + if (atomic_read(&entry->refcnt) != 0 && !force) + continue; + + list_del(&entry->list); + + if (entry->ops) { + entry->ops->deinit(entry->priv); + module_put(entry->ops->owner); + } + kfree(entry); + } + spin_unlock_irqrestore(info->lock, flags); +} +EXPORT_SYMBOL(lib80211_crypt_deinit_entries); + +/* After this, crypt_deinit_list won't accept new members */ +void lib80211_crypt_quiescing(struct lib80211_crypt_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(info->lock, flags); + info->crypt_quiesced = 1; + spin_unlock_irqrestore(info->lock, flags); +} +EXPORT_SYMBOL(lib80211_crypt_quiescing); + +void lib80211_crypt_deinit_handler(unsigned long data) +{ + struct lib80211_crypt_info *info = (struct lib80211_crypt_info *)data; + unsigned long flags; + + lib80211_crypt_deinit_entries(info, 0); + + spin_lock_irqsave(info->lock, flags); + if (!list_empty(&info->crypt_deinit_list) && !info->crypt_quiesced) { + printk(KERN_DEBUG "%s: entries remaining in delayed crypt " + "deletion list\n", info->name); + info->crypt_deinit_timer.expires = jiffies + HZ; + add_timer(&info->crypt_deinit_timer); + } + spin_unlock_irqrestore(info->lock, flags); +} +EXPORT_SYMBOL(lib80211_crypt_deinit_handler); + +void lib80211_crypt_delayed_deinit(struct lib80211_crypt_info *info, + struct lib80211_crypt_data **crypt) +{ + struct lib80211_crypt_data *tmp; + unsigned long flags; + + if (*crypt == NULL) + return; + + tmp = *crypt; + *crypt = NULL; + + /* must not run ops->deinit() while there may be pending encrypt or + * decrypt operations. Use a list of delayed deinits to avoid needing + * locking. */ + + spin_lock_irqsave(info->lock, flags); + if (!info->crypt_quiesced) { + list_add(&tmp->list, &info->crypt_deinit_list); + if (!timer_pending(&info->crypt_deinit_timer)) { + info->crypt_deinit_timer.expires = jiffies + HZ; + add_timer(&info->crypt_deinit_timer); + } + } + spin_unlock_irqrestore(info->lock, flags); +} +EXPORT_SYMBOL(lib80211_crypt_delayed_deinit); + +int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops) +{ + unsigned long flags; + struct lib80211_crypto_alg *alg; + + alg = kzalloc(sizeof(*alg), GFP_KERNEL); + if (alg == NULL) + return -ENOMEM; + + alg->ops = ops; + + spin_lock_irqsave(&lib80211_crypto_lock, flags); + list_add(&alg->list, &lib80211_crypto_algs); + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + + printk(KERN_DEBUG "lib80211_crypt: registered algorithm '%s'\n", + ops->name); + + return 0; +} +EXPORT_SYMBOL(lib80211_register_crypto_ops); + +int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops) +{ + struct lib80211_crypto_alg *alg; + unsigned long flags; + + spin_lock_irqsave(&lib80211_crypto_lock, flags); + list_for_each_entry(alg, &lib80211_crypto_algs, list) { + if (alg->ops == ops) + goto found; + } + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + return -EINVAL; + + found: + printk(KERN_DEBUG "lib80211_crypt: unregistered algorithm " + "'%s'\n", ops->name); + list_del(&alg->list); + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + kfree(alg); return 0; } +EXPORT_SYMBOL(lib80211_unregister_crypto_ops); + +struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name) +{ + struct lib80211_crypto_alg *alg; + unsigned long flags; + + spin_lock_irqsave(&lib80211_crypto_lock, flags); + list_for_each_entry(alg, &lib80211_crypto_algs, list) { + if (strcmp(alg->ops->name, name) == 0) + goto found; + } + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + return NULL; + + found: + spin_unlock_irqrestore(&lib80211_crypto_lock, flags); + return alg->ops; +} +EXPORT_SYMBOL(lib80211_get_crypto_ops); + +static void *lib80211_crypt_null_init(int keyidx) +{ + return (void *)1; +} + +static void lib80211_crypt_null_deinit(void *priv) +{ +} + +static struct lib80211_crypto_ops lib80211_crypt_null = { + .name = "NULL", + .init = lib80211_crypt_null_init, + .deinit = lib80211_crypt_null_deinit, + .owner = THIS_MODULE, +}; + +static int __init lib80211_init(void) +{ + printk(KERN_INFO DRV_NAME ": " DRV_DESCRIPTION "\n"); + return lib80211_register_crypto_ops(&lib80211_crypt_null); +} -static void __exit ieee80211_exit(void) +static void __exit lib80211_exit(void) { + lib80211_unregister_crypto_ops(&lib80211_crypt_null); + BUG_ON(!list_empty(&lib80211_crypto_algs)); } -module_init(ieee80211_init); -module_exit(ieee80211_exit); +module_init(lib80211_init); +module_exit(lib80211_exit); diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c new file mode 100644 index 000000000000..db428194c16a --- /dev/null +++ b/net/wireless/lib80211_crypt_ccmp.c @@ -0,0 +1,492 @@ +/* + * lib80211 crypt: host-based CCMP encryption implementation for lib80211 + * + * Copyright (c) 2003-2004, Jouni Malinen + * Copyright (c) 2008, John W. Linville + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. See README and COPYING for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +MODULE_AUTHOR("Jouni Malinen"); +MODULE_DESCRIPTION("Host AP crypt: CCMP"); +MODULE_LICENSE("GPL"); + +#define AES_BLOCK_LEN 16 +#define CCMP_HDR_LEN 8 +#define CCMP_MIC_LEN 8 +#define CCMP_TK_LEN 16 +#define CCMP_PN_LEN 6 + +struct lib80211_ccmp_data { + u8 key[CCMP_TK_LEN]; + int key_set; + + u8 tx_pn[CCMP_PN_LEN]; + u8 rx_pn[CCMP_PN_LEN]; + + u32 dot11RSNAStatsCCMPFormatErrors; + u32 dot11RSNAStatsCCMPReplays; + u32 dot11RSNAStatsCCMPDecryptErrors; + + int key_idx; + + struct crypto_cipher *tfm; + + /* scratch buffers for virt_to_page() (crypto API) */ + u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN], + tx_e[AES_BLOCK_LEN], tx_s0[AES_BLOCK_LEN]; + u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN]; +}; + +static inline void lib80211_ccmp_aes_encrypt(struct crypto_cipher *tfm, + const u8 pt[16], u8 ct[16]) +{ + crypto_cipher_encrypt_one(tfm, ct, pt); +} + +static void *lib80211_ccmp_init(int key_idx) +{ + struct lib80211_ccmp_data *priv; + + priv = kzalloc(sizeof(*priv), GFP_ATOMIC); + if (priv == NULL) + goto fail; + priv->key_idx = key_idx; + + priv->tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->tfm)) { + printk(KERN_DEBUG "lib80211_crypt_ccmp: could not allocate " + "crypto API aes\n"); + priv->tfm = NULL; + goto fail; + } + + return priv; + + fail: + if (priv) { + if (priv->tfm) + crypto_free_cipher(priv->tfm); + kfree(priv); + } + + return NULL; +} + +static void lib80211_ccmp_deinit(void *priv) +{ + struct lib80211_ccmp_data *_priv = priv; + if (_priv && _priv->tfm) + crypto_free_cipher(_priv->tfm); + kfree(priv); +} + +static inline void xor_block(u8 * b, u8 * a, size_t len) +{ + int i; + for (i = 0; i < len; i++) + b[i] ^= a[i]; +} + +static void ccmp_init_blocks(struct crypto_cipher *tfm, + struct ieee80211_hdr *hdr, + u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0) +{ + u8 *pos, qc = 0; + size_t aad_len; + int a4_included, qc_included; + u8 aad[2 * AES_BLOCK_LEN]; + + a4_included = ieee80211_has_a4(hdr->frame_control); + qc_included = ieee80211_is_data_qos(hdr->frame_control); + + aad_len = 22; + if (a4_included) + aad_len += 6; + if (qc_included) { + pos = (u8 *) & hdr->addr4; + if (a4_included) + pos += 6; + qc = *pos & 0x0f; + aad_len += 2; + } + + /* CCM Initial Block: + * Flag (Include authentication header, M=3 (8-octet MIC), + * L=1 (2-octet Dlen)) + * Nonce: 0x00 | A2 | PN + * Dlen */ + b0[0] = 0x59; + b0[1] = qc; + memcpy(b0 + 2, hdr->addr2, ETH_ALEN); + memcpy(b0 + 8, pn, CCMP_PN_LEN); + b0[14] = (dlen >> 8) & 0xff; + b0[15] = dlen & 0xff; + + /* AAD: + * FC with bits 4..6 and 11..13 masked to zero; 14 is always one + * A1 | A2 | A3 + * SC with bits 4..15 (seq#) masked to zero + * A4 (if present) + * QC (if present) + */ + pos = (u8 *) hdr; + aad[0] = 0; /* aad_len >> 8 */ + aad[1] = aad_len & 0xff; + aad[2] = pos[0] & 0x8f; + aad[3] = pos[1] & 0xc7; + memcpy(aad + 4, hdr->addr1, 3 * ETH_ALEN); + pos = (u8 *) & hdr->seq_ctrl; + aad[22] = pos[0] & 0x0f; + aad[23] = 0; /* all bits masked */ + memset(aad + 24, 0, 8); + if (a4_included) + memcpy(aad + 24, hdr->addr4, ETH_ALEN); + if (qc_included) { + aad[a4_included ? 30 : 24] = qc; + /* rest of QC masked */ + } + + /* Start with the first block and AAD */ + lib80211_ccmp_aes_encrypt(tfm, b0, auth); + xor_block(auth, aad, AES_BLOCK_LEN); + lib80211_ccmp_aes_encrypt(tfm, auth, auth); + xor_block(auth, &aad[AES_BLOCK_LEN], AES_BLOCK_LEN); + lib80211_ccmp_aes_encrypt(tfm, auth, auth); + b0[0] &= 0x07; + b0[14] = b0[15] = 0; + lib80211_ccmp_aes_encrypt(tfm, b0, s0); +} + +static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, + u8 *aeskey, int keylen, void *priv) +{ + struct lib80211_ccmp_data *key = priv; + int i; + u8 *pos; + + if (skb_headroom(skb) < CCMP_HDR_LEN || skb->len < hdr_len) + return -1; + + if (aeskey != NULL && keylen >= CCMP_TK_LEN) + memcpy(aeskey, key->key, CCMP_TK_LEN); + + pos = skb_push(skb, CCMP_HDR_LEN); + memmove(pos, pos + CCMP_HDR_LEN, hdr_len); + pos += hdr_len; + + i = CCMP_PN_LEN - 1; + while (i >= 0) { + key->tx_pn[i]++; + if (key->tx_pn[i] != 0) + break; + i--; + } + + *pos++ = key->tx_pn[5]; + *pos++ = key->tx_pn[4]; + *pos++ = 0; + *pos++ = (key->key_idx << 6) | (1 << 5) /* Ext IV included */ ; + *pos++ = key->tx_pn[3]; + *pos++ = key->tx_pn[2]; + *pos++ = key->tx_pn[1]; + *pos++ = key->tx_pn[0]; + + return CCMP_HDR_LEN; +} + +static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct lib80211_ccmp_data *key = priv; + int data_len, i, blocks, last, len; + u8 *pos, *mic; + struct ieee80211_hdr *hdr; + u8 *b0 = key->tx_b0; + u8 *b = key->tx_b; + u8 *e = key->tx_e; + u8 *s0 = key->tx_s0; + + if (skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len) + return -1; + + data_len = skb->len - hdr_len; + len = lib80211_ccmp_hdr(skb, hdr_len, NULL, 0, priv); + if (len < 0) + return -1; + + pos = skb->data + hdr_len + CCMP_HDR_LEN; + mic = skb_put(skb, CCMP_MIC_LEN); + hdr = (struct ieee80211_hdr *)skb->data; + ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0); + + blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); + last = data_len % AES_BLOCK_LEN; + + for (i = 1; i <= blocks; i++) { + len = (i == blocks && last) ? last : AES_BLOCK_LEN; + /* Authentication */ + xor_block(b, pos, len); + lib80211_ccmp_aes_encrypt(key->tfm, b, b); + /* Encryption, with counter */ + b0[14] = (i >> 8) & 0xff; + b0[15] = i & 0xff; + lib80211_ccmp_aes_encrypt(key->tfm, b0, e); + xor_block(pos, e, len); + pos += len; + } + + for (i = 0; i < CCMP_MIC_LEN; i++) + mic[i] = b[i] ^ s0[i]; + + return 0; +} + +/* + * deal with seq counter wrapping correctly. + * refer to timer_after() for jiffies wrapping handling + */ +static inline int ccmp_replay_check(u8 *pn_n, u8 *pn_o) +{ + u32 iv32_n, iv16_n; + u32 iv32_o, iv16_o; + + iv32_n = (pn_n[0] << 24) | (pn_n[1] << 16) | (pn_n[2] << 8) | pn_n[3]; + iv16_n = (pn_n[4] << 8) | pn_n[5]; + + iv32_o = (pn_o[0] << 24) | (pn_o[1] << 16) | (pn_o[2] << 8) | pn_o[3]; + iv16_o = (pn_o[4] << 8) | pn_o[5]; + + if ((s32)iv32_n - (s32)iv32_o < 0 || + (iv32_n == iv32_o && iv16_n <= iv16_o)) + return 1; + return 0; +} + +static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct lib80211_ccmp_data *key = priv; + u8 keyidx, *pos; + struct ieee80211_hdr *hdr; + u8 *b0 = key->rx_b0; + u8 *b = key->rx_b; + u8 *a = key->rx_a; + u8 pn[6]; + int i, blocks, last, len; + size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN - CCMP_MIC_LEN; + u8 *mic = skb->data + skb->len - CCMP_MIC_LEN; + + if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) { + key->dot11RSNAStatsCCMPFormatErrors++; + return -1; + } + + hdr = (struct ieee80211_hdr *)skb->data; + pos = skb->data + hdr_len; + keyidx = pos[3]; + if (!(keyidx & (1 << 5))) { + if (net_ratelimit()) { + printk(KERN_DEBUG "CCMP: received packet without ExtIV" + " flag from %pM\n", hdr->addr2); + } + key->dot11RSNAStatsCCMPFormatErrors++; + return -2; + } + keyidx >>= 6; + if (key->key_idx != keyidx) { + printk(KERN_DEBUG "CCMP: RX tkey->key_idx=%d frame " + "keyidx=%d priv=%p\n", key->key_idx, keyidx, priv); + return -6; + } + if (!key->key_set) { + if (net_ratelimit()) { + printk(KERN_DEBUG "CCMP: received packet from %pM" + " with keyid=%d that does not have a configured" + " key\n", hdr->addr2, keyidx); + } + return -3; + } + + pn[0] = pos[7]; + pn[1] = pos[6]; + pn[2] = pos[5]; + pn[3] = pos[4]; + pn[4] = pos[1]; + pn[5] = pos[0]; + pos += 8; + + if (ccmp_replay_check(pn, key->rx_pn)) { + if (net_ratelimit()) { + printk(KERN_DEBUG "CCMP: replay detected: STA=%pM " + "previous PN %02x%02x%02x%02x%02x%02x " + "received PN %02x%02x%02x%02x%02x%02x\n", + hdr->addr2, + key->rx_pn[0], key->rx_pn[1], key->rx_pn[2], + key->rx_pn[3], key->rx_pn[4], key->rx_pn[5], + pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); + } + key->dot11RSNAStatsCCMPReplays++; + return -4; + } + + ccmp_init_blocks(key->tfm, hdr, pn, data_len, b0, a, b); + xor_block(mic, b, CCMP_MIC_LEN); + + blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); + last = data_len % AES_BLOCK_LEN; + + for (i = 1; i <= blocks; i++) { + len = (i == blocks && last) ? last : AES_BLOCK_LEN; + /* Decrypt, with counter */ + b0[14] = (i >> 8) & 0xff; + b0[15] = i & 0xff; + lib80211_ccmp_aes_encrypt(key->tfm, b0, b); + xor_block(pos, b, len); + /* Authentication */ + xor_block(a, pos, len); + lib80211_ccmp_aes_encrypt(key->tfm, a, a); + pos += len; + } + + if (memcmp(mic, a, CCMP_MIC_LEN) != 0) { + if (net_ratelimit()) { + printk(KERN_DEBUG "CCMP: decrypt failed: STA=" + "%pM\n", hdr->addr2); + } + key->dot11RSNAStatsCCMPDecryptErrors++; + return -5; + } + + memcpy(key->rx_pn, pn, CCMP_PN_LEN); + + /* Remove hdr and MIC */ + memmove(skb->data + CCMP_HDR_LEN, skb->data, hdr_len); + skb_pull(skb, CCMP_HDR_LEN); + skb_trim(skb, skb->len - CCMP_MIC_LEN); + + return keyidx; +} + +static int lib80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) +{ + struct lib80211_ccmp_data *data = priv; + int keyidx; + struct crypto_cipher *tfm = data->tfm; + + keyidx = data->key_idx; + memset(data, 0, sizeof(*data)); + data->key_idx = keyidx; + data->tfm = tfm; + if (len == CCMP_TK_LEN) { + memcpy(data->key, key, CCMP_TK_LEN); + data->key_set = 1; + if (seq) { + data->rx_pn[0] = seq[5]; + data->rx_pn[1] = seq[4]; + data->rx_pn[2] = seq[3]; + data->rx_pn[3] = seq[2]; + data->rx_pn[4] = seq[1]; + data->rx_pn[5] = seq[0]; + } + crypto_cipher_setkey(data->tfm, data->key, CCMP_TK_LEN); + } else if (len == 0) + data->key_set = 0; + else + return -1; + + return 0; +} + +static int lib80211_ccmp_get_key(void *key, int len, u8 * seq, void *priv) +{ + struct lib80211_ccmp_data *data = priv; + + if (len < CCMP_TK_LEN) + return -1; + + if (!data->key_set) + return 0; + memcpy(key, data->key, CCMP_TK_LEN); + + if (seq) { + seq[0] = data->tx_pn[5]; + seq[1] = data->tx_pn[4]; + seq[2] = data->tx_pn[3]; + seq[3] = data->tx_pn[2]; + seq[4] = data->tx_pn[1]; + seq[5] = data->tx_pn[0]; + } + + return CCMP_TK_LEN; +} + +static char *lib80211_ccmp_print_stats(char *p, void *priv) +{ + struct lib80211_ccmp_data *ccmp = priv; + + p += sprintf(p, "key[%d] alg=CCMP key_set=%d " + "tx_pn=%02x%02x%02x%02x%02x%02x " + "rx_pn=%02x%02x%02x%02x%02x%02x " + "format_errors=%d replays=%d decrypt_errors=%d\n", + ccmp->key_idx, ccmp->key_set, + ccmp->tx_pn[0], ccmp->tx_pn[1], ccmp->tx_pn[2], + ccmp->tx_pn[3], ccmp->tx_pn[4], ccmp->tx_pn[5], + ccmp->rx_pn[0], ccmp->rx_pn[1], ccmp->rx_pn[2], + ccmp->rx_pn[3], ccmp->rx_pn[4], ccmp->rx_pn[5], + ccmp->dot11RSNAStatsCCMPFormatErrors, + ccmp->dot11RSNAStatsCCMPReplays, + ccmp->dot11RSNAStatsCCMPDecryptErrors); + + return p; +} + +static struct lib80211_crypto_ops lib80211_crypt_ccmp = { + .name = "CCMP", + .init = lib80211_ccmp_init, + .deinit = lib80211_ccmp_deinit, + .build_iv = lib80211_ccmp_hdr, + .encrypt_mpdu = lib80211_ccmp_encrypt, + .decrypt_mpdu = lib80211_ccmp_decrypt, + .encrypt_msdu = NULL, + .decrypt_msdu = NULL, + .set_key = lib80211_ccmp_set_key, + .get_key = lib80211_ccmp_get_key, + .print_stats = lib80211_ccmp_print_stats, + .extra_mpdu_prefix_len = CCMP_HDR_LEN, + .extra_mpdu_postfix_len = CCMP_MIC_LEN, + .owner = THIS_MODULE, +}; + +static int __init lib80211_crypto_ccmp_init(void) +{ + return lib80211_register_crypto_ops(&lib80211_crypt_ccmp); +} + +static void __exit lib80211_crypto_ccmp_exit(void) +{ + lib80211_unregister_crypto_ops(&lib80211_crypt_ccmp); +} + +module_init(lib80211_crypto_ccmp_init); +module_exit(lib80211_crypto_ccmp_exit); diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c new file mode 100644 index 000000000000..7e8e22bfed90 --- /dev/null +++ b/net/wireless/lib80211_crypt_tkip.c @@ -0,0 +1,784 @@ +/* + * lib80211 crypt: host-based TKIP encryption implementation for lib80211 + * + * Copyright (c) 2003-2004, Jouni Malinen + * Copyright (c) 2008, John W. Linville + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. See README and COPYING for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include + +MODULE_AUTHOR("Jouni Malinen"); +MODULE_DESCRIPTION("lib80211 crypt: TKIP"); +MODULE_LICENSE("GPL"); + +struct lib80211_tkip_data { +#define TKIP_KEY_LEN 32 + u8 key[TKIP_KEY_LEN]; + int key_set; + + u32 tx_iv32; + u16 tx_iv16; + u16 tx_ttak[5]; + int tx_phase1_done; + + u32 rx_iv32; + u16 rx_iv16; + u16 rx_ttak[5]; + int rx_phase1_done; + u32 rx_iv32_new; + u16 rx_iv16_new; + + u32 dot11RSNAStatsTKIPReplays; + u32 dot11RSNAStatsTKIPICVErrors; + u32 dot11RSNAStatsTKIPLocalMICFailures; + + int key_idx; + + struct crypto_blkcipher *rx_tfm_arc4; + struct crypto_hash *rx_tfm_michael; + struct crypto_blkcipher *tx_tfm_arc4; + struct crypto_hash *tx_tfm_michael; + + /* scratch buffers for virt_to_page() (crypto API) */ + u8 rx_hdr[16], tx_hdr[16]; + + unsigned long flags; +}; + +static unsigned long lib80211_tkip_set_flags(unsigned long flags, void *priv) +{ + struct lib80211_tkip_data *_priv = priv; + unsigned long old_flags = _priv->flags; + _priv->flags = flags; + return old_flags; +} + +static unsigned long lib80211_tkip_get_flags(void *priv) +{ + struct lib80211_tkip_data *_priv = priv; + return _priv->flags; +} + +static void *lib80211_tkip_init(int key_idx) +{ + struct lib80211_tkip_data *priv; + + priv = kzalloc(sizeof(*priv), GFP_ATOMIC); + if (priv == NULL) + goto fail; + + priv->key_idx = key_idx; + + priv->tx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->tx_tfm_arc4)) { + printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " + "crypto API arc4\n"); + priv->tx_tfm_arc4 = NULL; + goto fail; + } + + priv->tx_tfm_michael = crypto_alloc_hash("michael_mic", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->tx_tfm_michael)) { + printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " + "crypto API michael_mic\n"); + priv->tx_tfm_michael = NULL; + goto fail; + } + + priv->rx_tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->rx_tfm_arc4)) { + printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " + "crypto API arc4\n"); + priv->rx_tfm_arc4 = NULL; + goto fail; + } + + priv->rx_tfm_michael = crypto_alloc_hash("michael_mic", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->rx_tfm_michael)) { + printk(KERN_DEBUG "lib80211_crypt_tkip: could not allocate " + "crypto API michael_mic\n"); + priv->rx_tfm_michael = NULL; + goto fail; + } + + return priv; + + fail: + if (priv) { + if (priv->tx_tfm_michael) + crypto_free_hash(priv->tx_tfm_michael); + if (priv->tx_tfm_arc4) + crypto_free_blkcipher(priv->tx_tfm_arc4); + if (priv->rx_tfm_michael) + crypto_free_hash(priv->rx_tfm_michael); + if (priv->rx_tfm_arc4) + crypto_free_blkcipher(priv->rx_tfm_arc4); + kfree(priv); + } + + return NULL; +} + +static void lib80211_tkip_deinit(void *priv) +{ + struct lib80211_tkip_data *_priv = priv; + if (_priv) { + if (_priv->tx_tfm_michael) + crypto_free_hash(_priv->tx_tfm_michael); + if (_priv->tx_tfm_arc4) + crypto_free_blkcipher(_priv->tx_tfm_arc4); + if (_priv->rx_tfm_michael) + crypto_free_hash(_priv->rx_tfm_michael); + if (_priv->rx_tfm_arc4) + crypto_free_blkcipher(_priv->rx_tfm_arc4); + } + kfree(priv); +} + +static inline u16 RotR1(u16 val) +{ + return (val >> 1) | (val << 15); +} + +static inline u8 Lo8(u16 val) +{ + return val & 0xff; +} + +static inline u8 Hi8(u16 val) +{ + return val >> 8; +} + +static inline u16 Lo16(u32 val) +{ + return val & 0xffff; +} + +static inline u16 Hi16(u32 val) +{ + return val >> 16; +} + +static inline u16 Mk16(u8 hi, u8 lo) +{ + return lo | (((u16) hi) << 8); +} + +static inline u16 Mk16_le(__le16 * v) +{ + return le16_to_cpu(*v); +} + +static const u16 Sbox[256] = { + 0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154, + 0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A, + 0x8F45, 0x1F9D, 0x8940, 0xFA87, 0xEF15, 0xB2EB, 0x8EC9, 0xFB0B, + 0x41EC, 0xB367, 0x5FFD, 0x45EA, 0x23BF, 0x53F7, 0xE496, 0x9B5B, + 0x75C2, 0xE11C, 0x3DAE, 0x4C6A, 0x6C5A, 0x7E41, 0xF502, 0x834F, + 0x685C, 0x51F4, 0xD134, 0xF908, 0xE293, 0xAB73, 0x6253, 0x2A3F, + 0x080C, 0x9552, 0x4665, 0x9D5E, 0x3028, 0x37A1, 0x0A0F, 0x2FB5, + 0x0E09, 0x2436, 0x1B9B, 0xDF3D, 0xCD26, 0x4E69, 0x7FCD, 0xEA9F, + 0x121B, 0x1D9E, 0x5874, 0x342E, 0x362D, 0xDCB2, 0xB4EE, 0x5BFB, + 0xA4F6, 0x764D, 0xB761, 0x7DCE, 0x527B, 0xDD3E, 0x5E71, 0x1397, + 0xA6F5, 0xB968, 0x0000, 0xC12C, 0x4060, 0xE31F, 0x79C8, 0xB6ED, + 0xD4BE, 0x8D46, 0x67D9, 0x724B, 0x94DE, 0x98D4, 0xB0E8, 0x854A, + 0xBB6B, 0xC52A, 0x4FE5, 0xED16, 0x86C5, 0x9AD7, 0x6655, 0x1194, + 0x8ACF, 0xE910, 0x0406, 0xFE81, 0xA0F0, 0x7844, 0x25BA, 0x4BE3, + 0xA2F3, 0x5DFE, 0x80C0, 0x058A, 0x3FAD, 0x21BC, 0x7048, 0xF104, + 0x63DF, 0x77C1, 0xAF75, 0x4263, 0x2030, 0xE51A, 0xFD0E, 0xBF6D, + 0x814C, 0x1814, 0x2635, 0xC32F, 0xBEE1, 0x35A2, 0x88CC, 0x2E39, + 0x9357, 0x55F2, 0xFC82, 0x7A47, 0xC8AC, 0xBAE7, 0x322B, 0xE695, + 0xC0A0, 0x1998, 0x9ED1, 0xA37F, 0x4466, 0x547E, 0x3BAB, 0x0B83, + 0x8CCA, 0xC729, 0x6BD3, 0x283C, 0xA779, 0xBCE2, 0x161D, 0xAD76, + 0xDB3B, 0x6456, 0x744E, 0x141E, 0x92DB, 0x0C0A, 0x486C, 0xB8E4, + 0x9F5D, 0xBD6E, 0x43EF, 0xC4A6, 0x39A8, 0x31A4, 0xD337, 0xF28B, + 0xD532, 0x8B43, 0x6E59, 0xDAB7, 0x018C, 0xB164, 0x9CD2, 0x49E0, + 0xD8B4, 0xACFA, 0xF307, 0xCF25, 0xCAAF, 0xF48E, 0x47E9, 0x1018, + 0x6FD5, 0xF088, 0x4A6F, 0x5C72, 0x3824, 0x57F1, 0x73C7, 0x9751, + 0xCB23, 0xA17C, 0xE89C, 0x3E21, 0x96DD, 0x61DC, 0x0D86, 0x0F85, + 0xE090, 0x7C42, 0x71C4, 0xCCAA, 0x90D8, 0x0605, 0xF701, 0x1C12, + 0xC2A3, 0x6A5F, 0xAEF9, 0x69D0, 0x1791, 0x9958, 0x3A27, 0x27B9, + 0xD938, 0xEB13, 0x2BB3, 0x2233, 0xD2BB, 0xA970, 0x0789, 0x33A7, + 0x2DB6, 0x3C22, 0x1592, 0xC920, 0x8749, 0xAAFF, 0x5078, 0xA57A, + 0x038F, 0x59F8, 0x0980, 0x1A17, 0x65DA, 0xD731, 0x84C6, 0xD0B8, + 0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A, +}; + +static inline u16 _S_(u16 v) +{ + u16 t = Sbox[Hi8(v)]; + return Sbox[Lo8(v)] ^ ((t << 8) | (t >> 8)); +} + +#define PHASE1_LOOP_COUNT 8 + +static void tkip_mixing_phase1(u16 * TTAK, const u8 * TK, const u8 * TA, + u32 IV32) +{ + int i, j; + + /* Initialize the 80-bit TTAK from TSC (IV32) and TA[0..5] */ + TTAK[0] = Lo16(IV32); + TTAK[1] = Hi16(IV32); + TTAK[2] = Mk16(TA[1], TA[0]); + TTAK[3] = Mk16(TA[3], TA[2]); + TTAK[4] = Mk16(TA[5], TA[4]); + + for (i = 0; i < PHASE1_LOOP_COUNT; i++) { + j = 2 * (i & 1); + TTAK[0] += _S_(TTAK[4] ^ Mk16(TK[1 + j], TK[0 + j])); + TTAK[1] += _S_(TTAK[0] ^ Mk16(TK[5 + j], TK[4 + j])); + TTAK[2] += _S_(TTAK[1] ^ Mk16(TK[9 + j], TK[8 + j])); + TTAK[3] += _S_(TTAK[2] ^ Mk16(TK[13 + j], TK[12 + j])); + TTAK[4] += _S_(TTAK[3] ^ Mk16(TK[1 + j], TK[0 + j])) + i; + } +} + +static void tkip_mixing_phase2(u8 * WEPSeed, const u8 * TK, const u16 * TTAK, + u16 IV16) +{ + /* Make temporary area overlap WEP seed so that the final copy can be + * avoided on little endian hosts. */ + u16 *PPK = (u16 *) & WEPSeed[4]; + + /* Step 1 - make copy of TTAK and bring in TSC */ + PPK[0] = TTAK[0]; + PPK[1] = TTAK[1]; + PPK[2] = TTAK[2]; + PPK[3] = TTAK[3]; + PPK[4] = TTAK[4]; + PPK[5] = TTAK[4] + IV16; + + /* Step 2 - 96-bit bijective mixing using S-box */ + PPK[0] += _S_(PPK[5] ^ Mk16_le((__le16 *) & TK[0])); + PPK[1] += _S_(PPK[0] ^ Mk16_le((__le16 *) & TK[2])); + PPK[2] += _S_(PPK[1] ^ Mk16_le((__le16 *) & TK[4])); + PPK[3] += _S_(PPK[2] ^ Mk16_le((__le16 *) & TK[6])); + PPK[4] += _S_(PPK[3] ^ Mk16_le((__le16 *) & TK[8])); + PPK[5] += _S_(PPK[4] ^ Mk16_le((__le16 *) & TK[10])); + + PPK[0] += RotR1(PPK[5] ^ Mk16_le((__le16 *) & TK[12])); + PPK[1] += RotR1(PPK[0] ^ Mk16_le((__le16 *) & TK[14])); + PPK[2] += RotR1(PPK[1]); + PPK[3] += RotR1(PPK[2]); + PPK[4] += RotR1(PPK[3]); + PPK[5] += RotR1(PPK[4]); + + /* Step 3 - bring in last of TK bits, assign 24-bit WEP IV value + * WEPSeed[0..2] is transmitted as WEP IV */ + WEPSeed[0] = Hi8(IV16); + WEPSeed[1] = (Hi8(IV16) | 0x20) & 0x7F; + WEPSeed[2] = Lo8(IV16); + WEPSeed[3] = Lo8((PPK[5] ^ Mk16_le((__le16 *) & TK[0])) >> 1); + +#ifdef __BIG_ENDIAN + { + int i; + for (i = 0; i < 6; i++) + PPK[i] = (PPK[i] << 8) | (PPK[i] >> 8); + } +#endif +} + +static int lib80211_tkip_hdr(struct sk_buff *skb, int hdr_len, + u8 * rc4key, int keylen, void *priv) +{ + struct lib80211_tkip_data *tkey = priv; + int len; + u8 *pos; + struct ieee80211_hdr *hdr; + + hdr = (struct ieee80211_hdr *)skb->data; + + if (skb_headroom(skb) < 8 || skb->len < hdr_len) + return -1; + + if (rc4key == NULL || keylen < 16) + return -1; + + if (!tkey->tx_phase1_done) { + tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2, + tkey->tx_iv32); + tkey->tx_phase1_done = 1; + } + tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak, tkey->tx_iv16); + + len = skb->len - hdr_len; + pos = skb_push(skb, 8); + memmove(pos, pos + 8, hdr_len); + pos += hdr_len; + + *pos++ = *rc4key; + *pos++ = *(rc4key + 1); + *pos++ = *(rc4key + 2); + *pos++ = (tkey->key_idx << 6) | (1 << 5) /* Ext IV included */ ; + *pos++ = tkey->tx_iv32 & 0xff; + *pos++ = (tkey->tx_iv32 >> 8) & 0xff; + *pos++ = (tkey->tx_iv32 >> 16) & 0xff; + *pos++ = (tkey->tx_iv32 >> 24) & 0xff; + + tkey->tx_iv16++; + if (tkey->tx_iv16 == 0) { + tkey->tx_phase1_done = 0; + tkey->tx_iv32++; + } + + return 8; +} + +static int lib80211_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct lib80211_tkip_data *tkey = priv; + struct blkcipher_desc desc = { .tfm = tkey->tx_tfm_arc4 }; + int len; + u8 rc4key[16], *pos, *icv; + u32 crc; + struct scatterlist sg; + + if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { + if (net_ratelimit()) { + struct ieee80211_hdr *hdr = + (struct ieee80211_hdr *)skb->data; + printk(KERN_DEBUG ": TKIP countermeasures: dropped " + "TX packet to %pM\n", hdr->addr1); + } + return -1; + } + + if (skb_tailroom(skb) < 4 || skb->len < hdr_len) + return -1; + + len = skb->len - hdr_len; + pos = skb->data + hdr_len; + + if ((lib80211_tkip_hdr(skb, hdr_len, rc4key, 16, priv)) < 0) + return -1; + + icv = skb_put(skb, 4); + + crc = ~crc32_le(~0, pos, len); + icv[0] = crc; + icv[1] = crc >> 8; + icv[2] = crc >> 16; + icv[3] = crc >> 24; + + crypto_blkcipher_setkey(tkey->tx_tfm_arc4, rc4key, 16); + sg_init_one(&sg, pos, len + 4); + return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); +} + +/* + * deal with seq counter wrapping correctly. + * refer to timer_after() for jiffies wrapping handling + */ +static inline int tkip_replay_check(u32 iv32_n, u16 iv16_n, + u32 iv32_o, u16 iv16_o) +{ + if ((s32)iv32_n - (s32)iv32_o < 0 || + (iv32_n == iv32_o && iv16_n <= iv16_o)) + return 1; + return 0; +} + +static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct lib80211_tkip_data *tkey = priv; + struct blkcipher_desc desc = { .tfm = tkey->rx_tfm_arc4 }; + u8 rc4key[16]; + u8 keyidx, *pos; + u32 iv32; + u16 iv16; + struct ieee80211_hdr *hdr; + u8 icv[4]; + u32 crc; + struct scatterlist sg; + int plen; + + hdr = (struct ieee80211_hdr *)skb->data; + + if (tkey->flags & IEEE80211_CRYPTO_TKIP_COUNTERMEASURES) { + if (net_ratelimit()) { + printk(KERN_DEBUG ": TKIP countermeasures: dropped " + "received packet from %pM\n", hdr->addr2); + } + return -1; + } + + if (skb->len < hdr_len + 8 + 4) + return -1; + + pos = skb->data + hdr_len; + keyidx = pos[3]; + if (!(keyidx & (1 << 5))) { + if (net_ratelimit()) { + printk(KERN_DEBUG "TKIP: received packet without ExtIV" + " flag from %pM\n", hdr->addr2); + } + return -2; + } + keyidx >>= 6; + if (tkey->key_idx != keyidx) { + printk(KERN_DEBUG "TKIP: RX tkey->key_idx=%d frame " + "keyidx=%d priv=%p\n", tkey->key_idx, keyidx, priv); + return -6; + } + if (!tkey->key_set) { + if (net_ratelimit()) { + printk(KERN_DEBUG "TKIP: received packet from %pM" + " with keyid=%d that does not have a configured" + " key\n", hdr->addr2, keyidx); + } + return -3; + } + iv16 = (pos[0] << 8) | pos[2]; + iv32 = pos[4] | (pos[5] << 8) | (pos[6] << 16) | (pos[7] << 24); + pos += 8; + + if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { + if (net_ratelimit()) { + printk(KERN_DEBUG "TKIP: replay detected: STA=%pM" + " previous TSC %08x%04x received TSC " + "%08x%04x\n", hdr->addr2, + tkey->rx_iv32, tkey->rx_iv16, iv32, iv16); + } + tkey->dot11RSNAStatsTKIPReplays++; + return -4; + } + + if (iv32 != tkey->rx_iv32 || !tkey->rx_phase1_done) { + tkip_mixing_phase1(tkey->rx_ttak, tkey->key, hdr->addr2, iv32); + tkey->rx_phase1_done = 1; + } + tkip_mixing_phase2(rc4key, tkey->key, tkey->rx_ttak, iv16); + + plen = skb->len - hdr_len - 12; + + crypto_blkcipher_setkey(tkey->rx_tfm_arc4, rc4key, 16); + sg_init_one(&sg, pos, plen + 4); + if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) { + if (net_ratelimit()) { + printk(KERN_DEBUG ": TKIP: failed to decrypt " + "received packet from %pM\n", + hdr->addr2); + } + return -7; + } + + crc = ~crc32_le(~0, pos, plen); + icv[0] = crc; + icv[1] = crc >> 8; + icv[2] = crc >> 16; + icv[3] = crc >> 24; + if (memcmp(icv, pos + plen, 4) != 0) { + if (iv32 != tkey->rx_iv32) { + /* Previously cached Phase1 result was already lost, so + * it needs to be recalculated for the next packet. */ + tkey->rx_phase1_done = 0; + } + if (net_ratelimit()) { + printk(KERN_DEBUG "TKIP: ICV error detected: STA=" + "%pM\n", hdr->addr2); + } + tkey->dot11RSNAStatsTKIPICVErrors++; + return -5; + } + + /* Update real counters only after Michael MIC verification has + * completed */ + tkey->rx_iv32_new = iv32; + tkey->rx_iv16_new = iv16; + + /* Remove IV and ICV */ + memmove(skb->data + 8, skb->data, hdr_len); + skb_pull(skb, 8); + skb_trim(skb, skb->len - 4); + + return keyidx; +} + +static int michael_mic(struct crypto_hash *tfm_michael, u8 * key, u8 * hdr, + u8 * data, size_t data_len, u8 * mic) +{ + struct hash_desc desc; + struct scatterlist sg[2]; + + if (tfm_michael == NULL) { + printk(KERN_WARNING "michael_mic: tfm_michael == NULL\n"); + return -1; + } + sg_init_table(sg, 2); + sg_set_buf(&sg[0], hdr, 16); + sg_set_buf(&sg[1], data, data_len); + + if (crypto_hash_setkey(tfm_michael, key, 8)) + return -1; + + desc.tfm = tfm_michael; + desc.flags = 0; + return crypto_hash_digest(&desc, sg, data_len + 16, mic); +} + +static void michael_mic_hdr(struct sk_buff *skb, u8 * hdr) +{ + struct ieee80211_hdr *hdr11; + + hdr11 = (struct ieee80211_hdr *)skb->data; + + switch (le16_to_cpu(hdr11->frame_control) & + (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) { + case IEEE80211_FCTL_TODS: + memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ + memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ + break; + case IEEE80211_FCTL_FROMDS: + memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ + memcpy(hdr + ETH_ALEN, hdr11->addr3, ETH_ALEN); /* SA */ + break; + case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS: + memcpy(hdr, hdr11->addr3, ETH_ALEN); /* DA */ + memcpy(hdr + ETH_ALEN, hdr11->addr4, ETH_ALEN); /* SA */ + break; + case 0: + memcpy(hdr, hdr11->addr1, ETH_ALEN); /* DA */ + memcpy(hdr + ETH_ALEN, hdr11->addr2, ETH_ALEN); /* SA */ + break; + } + + if (ieee80211_is_data_qos(hdr11->frame_control)) { + hdr[12] = le16_to_cpu(*ieee80211_get_qos_ctl(hdr11)) + & IEEE80211_QOS_CTL_TID_MASK; + } else + hdr[12] = 0; /* priority */ + + hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */ +} + +static int lib80211_michael_mic_add(struct sk_buff *skb, int hdr_len, + void *priv) +{ + struct lib80211_tkip_data *tkey = priv; + u8 *pos; + + if (skb_tailroom(skb) < 8 || skb->len < hdr_len) { + printk(KERN_DEBUG "Invalid packet for Michael MIC add " + "(tailroom=%d hdr_len=%d skb->len=%d)\n", + skb_tailroom(skb), hdr_len, skb->len); + return -1; + } + + michael_mic_hdr(skb, tkey->tx_hdr); + pos = skb_put(skb, 8); + if (michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr, + skb->data + hdr_len, skb->len - 8 - hdr_len, pos)) + return -1; + + return 0; +} + +static void lib80211_michael_mic_failure(struct net_device *dev, + struct ieee80211_hdr *hdr, + int keyidx) +{ + union iwreq_data wrqu; + struct iw_michaelmicfailure ev; + + /* TODO: needed parameters: count, keyid, key type, TSC */ + memset(&ev, 0, sizeof(ev)); + ev.flags = keyidx & IW_MICFAILURE_KEY_ID; + if (hdr->addr1[0] & 0x01) + ev.flags |= IW_MICFAILURE_GROUP; + else + ev.flags |= IW_MICFAILURE_PAIRWISE; + ev.src_addr.sa_family = ARPHRD_ETHER; + memcpy(ev.src_addr.sa_data, hdr->addr2, ETH_ALEN); + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.data.length = sizeof(ev); + wireless_send_event(dev, IWEVMICHAELMICFAILURE, &wrqu, (char *)&ev); +} + +static int lib80211_michael_mic_verify(struct sk_buff *skb, int keyidx, + int hdr_len, void *priv) +{ + struct lib80211_tkip_data *tkey = priv; + u8 mic[8]; + + if (!tkey->key_set) + return -1; + + michael_mic_hdr(skb, tkey->rx_hdr); + if (michael_mic(tkey->rx_tfm_michael, &tkey->key[24], tkey->rx_hdr, + skb->data + hdr_len, skb->len - 8 - hdr_len, mic)) + return -1; + if (memcmp(mic, skb->data + skb->len - 8, 8) != 0) { + struct ieee80211_hdr *hdr; + hdr = (struct ieee80211_hdr *)skb->data; + printk(KERN_DEBUG "%s: Michael MIC verification failed for " + "MSDU from %pM keyidx=%d\n", + skb->dev ? skb->dev->name : "N/A", hdr->addr2, + keyidx); + if (skb->dev) + lib80211_michael_mic_failure(skb->dev, hdr, keyidx); + tkey->dot11RSNAStatsTKIPLocalMICFailures++; + return -1; + } + + /* Update TSC counters for RX now that the packet verification has + * completed. */ + tkey->rx_iv32 = tkey->rx_iv32_new; + tkey->rx_iv16 = tkey->rx_iv16_new; + + skb_trim(skb, skb->len - 8); + + return 0; +} + +static int lib80211_tkip_set_key(void *key, int len, u8 * seq, void *priv) +{ + struct lib80211_tkip_data *tkey = priv; + int keyidx; + struct crypto_hash *tfm = tkey->tx_tfm_michael; + struct crypto_blkcipher *tfm2 = tkey->tx_tfm_arc4; + struct crypto_hash *tfm3 = tkey->rx_tfm_michael; + struct crypto_blkcipher *tfm4 = tkey->rx_tfm_arc4; + + keyidx = tkey->key_idx; + memset(tkey, 0, sizeof(*tkey)); + tkey->key_idx = keyidx; + tkey->tx_tfm_michael = tfm; + tkey->tx_tfm_arc4 = tfm2; + tkey->rx_tfm_michael = tfm3; + tkey->rx_tfm_arc4 = tfm4; + if (len == TKIP_KEY_LEN) { + memcpy(tkey->key, key, TKIP_KEY_LEN); + tkey->key_set = 1; + tkey->tx_iv16 = 1; /* TSC is initialized to 1 */ + if (seq) { + tkey->rx_iv32 = (seq[5] << 24) | (seq[4] << 16) | + (seq[3] << 8) | seq[2]; + tkey->rx_iv16 = (seq[1] << 8) | seq[0]; + } + } else if (len == 0) + tkey->key_set = 0; + else + return -1; + + return 0; +} + +static int lib80211_tkip_get_key(void *key, int len, u8 * seq, void *priv) +{ + struct lib80211_tkip_data *tkey = priv; + + if (len < TKIP_KEY_LEN) + return -1; + + if (!tkey->key_set) + return 0; + memcpy(key, tkey->key, TKIP_KEY_LEN); + + if (seq) { + /* Return the sequence number of the last transmitted frame. */ + u16 iv16 = tkey->tx_iv16; + u32 iv32 = tkey->tx_iv32; + if (iv16 == 0) + iv32--; + iv16--; + seq[0] = tkey->tx_iv16; + seq[1] = tkey->tx_iv16 >> 8; + seq[2] = tkey->tx_iv32; + seq[3] = tkey->tx_iv32 >> 8; + seq[4] = tkey->tx_iv32 >> 16; + seq[5] = tkey->tx_iv32 >> 24; + } + + return TKIP_KEY_LEN; +} + +static char *lib80211_tkip_print_stats(char *p, void *priv) +{ + struct lib80211_tkip_data *tkip = priv; + p += sprintf(p, "key[%d] alg=TKIP key_set=%d " + "tx_pn=%02x%02x%02x%02x%02x%02x " + "rx_pn=%02x%02x%02x%02x%02x%02x " + "replays=%d icv_errors=%d local_mic_failures=%d\n", + tkip->key_idx, tkip->key_set, + (tkip->tx_iv32 >> 24) & 0xff, + (tkip->tx_iv32 >> 16) & 0xff, + (tkip->tx_iv32 >> 8) & 0xff, + tkip->tx_iv32 & 0xff, + (tkip->tx_iv16 >> 8) & 0xff, + tkip->tx_iv16 & 0xff, + (tkip->rx_iv32 >> 24) & 0xff, + (tkip->rx_iv32 >> 16) & 0xff, + (tkip->rx_iv32 >> 8) & 0xff, + tkip->rx_iv32 & 0xff, + (tkip->rx_iv16 >> 8) & 0xff, + tkip->rx_iv16 & 0xff, + tkip->dot11RSNAStatsTKIPReplays, + tkip->dot11RSNAStatsTKIPICVErrors, + tkip->dot11RSNAStatsTKIPLocalMICFailures); + return p; +} + +static struct lib80211_crypto_ops lib80211_crypt_tkip = { + .name = "TKIP", + .init = lib80211_tkip_init, + .deinit = lib80211_tkip_deinit, + .build_iv = lib80211_tkip_hdr, + .encrypt_mpdu = lib80211_tkip_encrypt, + .decrypt_mpdu = lib80211_tkip_decrypt, + .encrypt_msdu = lib80211_michael_mic_add, + .decrypt_msdu = lib80211_michael_mic_verify, + .set_key = lib80211_tkip_set_key, + .get_key = lib80211_tkip_get_key, + .print_stats = lib80211_tkip_print_stats, + .extra_mpdu_prefix_len = 4 + 4, /* IV + ExtIV */ + .extra_mpdu_postfix_len = 4, /* ICV */ + .extra_msdu_postfix_len = 8, /* MIC */ + .get_flags = lib80211_tkip_get_flags, + .set_flags = lib80211_tkip_set_flags, + .owner = THIS_MODULE, +}; + +static int __init lib80211_crypto_tkip_init(void) +{ + return lib80211_register_crypto_ops(&lib80211_crypt_tkip); +} + +static void __exit lib80211_crypto_tkip_exit(void) +{ + lib80211_unregister_crypto_ops(&lib80211_crypt_tkip); +} + +module_init(lib80211_crypto_tkip_init); +module_exit(lib80211_crypto_tkip_exit); diff --git a/net/wireless/lib80211_crypt_wep.c b/net/wireless/lib80211_crypt_wep.c new file mode 100644 index 000000000000..6d41e05ca33b --- /dev/null +++ b/net/wireless/lib80211_crypt_wep.c @@ -0,0 +1,296 @@ +/* + * lib80211 crypt: host-based WEP encryption implementation for lib80211 + * + * Copyright (c) 2002-2004, Jouni Malinen + * Copyright (c) 2008, John W. Linville + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. See README and COPYING for + * more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +MODULE_AUTHOR("Jouni Malinen"); +MODULE_DESCRIPTION("lib80211 crypt: WEP"); +MODULE_LICENSE("GPL"); + +struct lib80211_wep_data { + u32 iv; +#define WEP_KEY_LEN 13 + u8 key[WEP_KEY_LEN + 1]; + u8 key_len; + u8 key_idx; + struct crypto_blkcipher *tx_tfm; + struct crypto_blkcipher *rx_tfm; +}; + +static void *lib80211_wep_init(int keyidx) +{ + struct lib80211_wep_data *priv; + + priv = kzalloc(sizeof(*priv), GFP_ATOMIC); + if (priv == NULL) + goto fail; + priv->key_idx = keyidx; + + priv->tx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->tx_tfm)) { + printk(KERN_DEBUG "lib80211_crypt_wep: could not allocate " + "crypto API arc4\n"); + priv->tx_tfm = NULL; + goto fail; + } + + priv->rx_tfm = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(priv->rx_tfm)) { + printk(KERN_DEBUG "lib80211_crypt_wep: could not allocate " + "crypto API arc4\n"); + priv->rx_tfm = NULL; + goto fail; + } + /* start WEP IV from a random value */ + get_random_bytes(&priv->iv, 4); + + return priv; + + fail: + if (priv) { + if (priv->tx_tfm) + crypto_free_blkcipher(priv->tx_tfm); + if (priv->rx_tfm) + crypto_free_blkcipher(priv->rx_tfm); + kfree(priv); + } + return NULL; +} + +static void lib80211_wep_deinit(void *priv) +{ + struct lib80211_wep_data *_priv = priv; + if (_priv) { + if (_priv->tx_tfm) + crypto_free_blkcipher(_priv->tx_tfm); + if (_priv->rx_tfm) + crypto_free_blkcipher(_priv->rx_tfm); + } + kfree(priv); +} + +/* Add WEP IV/key info to a frame that has at least 4 bytes of headroom */ +static int lib80211_wep_build_iv(struct sk_buff *skb, int hdr_len, + u8 *key, int keylen, void *priv) +{ + struct lib80211_wep_data *wep = priv; + u32 klen, len; + u8 *pos; + + if (skb_headroom(skb) < 4 || skb->len < hdr_len) + return -1; + + len = skb->len - hdr_len; + pos = skb_push(skb, 4); + memmove(pos, pos + 4, hdr_len); + pos += hdr_len; + + klen = 3 + wep->key_len; + + wep->iv++; + + /* Fluhrer, Mantin, and Shamir have reported weaknesses in the key + * scheduling algorithm of RC4. At least IVs (KeyByte + 3, 0xff, N) + * can be used to speedup attacks, so avoid using them. */ + if ((wep->iv & 0xff00) == 0xff00) { + u8 B = (wep->iv >> 16) & 0xff; + if (B >= 3 && B < klen) + wep->iv += 0x0100; + } + + /* Prepend 24-bit IV to RC4 key and TX frame */ + *pos++ = (wep->iv >> 16) & 0xff; + *pos++ = (wep->iv >> 8) & 0xff; + *pos++ = wep->iv & 0xff; + *pos++ = wep->key_idx << 6; + + return 0; +} + +/* Perform WEP encryption on given skb that has at least 4 bytes of headroom + * for IV and 4 bytes of tailroom for ICV. Both IV and ICV will be transmitted, + * so the payload length increases with 8 bytes. + * + * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data)) + */ +static int lib80211_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct lib80211_wep_data *wep = priv; + struct blkcipher_desc desc = { .tfm = wep->tx_tfm }; + u32 crc, klen, len; + u8 *pos, *icv; + struct scatterlist sg; + u8 key[WEP_KEY_LEN + 3]; + + /* other checks are in lib80211_wep_build_iv */ + if (skb_tailroom(skb) < 4) + return -1; + + /* add the IV to the frame */ + if (lib80211_wep_build_iv(skb, hdr_len, NULL, 0, priv)) + return -1; + + /* Copy the IV into the first 3 bytes of the key */ + skb_copy_from_linear_data_offset(skb, hdr_len, key, 3); + + /* Copy rest of the WEP key (the secret part) */ + memcpy(key + 3, wep->key, wep->key_len); + + len = skb->len - hdr_len - 4; + pos = skb->data + hdr_len + 4; + klen = 3 + wep->key_len; + + /* Append little-endian CRC32 over only the data and encrypt it to produce ICV */ + crc = ~crc32_le(~0, pos, len); + icv = skb_put(skb, 4); + icv[0] = crc; + icv[1] = crc >> 8; + icv[2] = crc >> 16; + icv[3] = crc >> 24; + + crypto_blkcipher_setkey(wep->tx_tfm, key, klen); + sg_init_one(&sg, pos, len + 4); + return crypto_blkcipher_encrypt(&desc, &sg, &sg, len + 4); +} + +/* Perform WEP decryption on given buffer. Buffer includes whole WEP part of + * the frame: IV (4 bytes), encrypted payload (including SNAP header), + * ICV (4 bytes). len includes both IV and ICV. + * + * Returns 0 if frame was decrypted successfully and ICV was correct and -1 on + * failure. If frame is OK, IV and ICV will be removed. + */ +static int lib80211_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv) +{ + struct lib80211_wep_data *wep = priv; + struct blkcipher_desc desc = { .tfm = wep->rx_tfm }; + u32 crc, klen, plen; + u8 key[WEP_KEY_LEN + 3]; + u8 keyidx, *pos, icv[4]; + struct scatterlist sg; + + if (skb->len < hdr_len + 8) + return -1; + + pos = skb->data + hdr_len; + key[0] = *pos++; + key[1] = *pos++; + key[2] = *pos++; + keyidx = *pos++ >> 6; + if (keyidx != wep->key_idx) + return -1; + + klen = 3 + wep->key_len; + + /* Copy rest of the WEP key (the secret part) */ + memcpy(key + 3, wep->key, wep->key_len); + + /* Apply RC4 to data and compute CRC32 over decrypted data */ + plen = skb->len - hdr_len - 8; + + crypto_blkcipher_setkey(wep->rx_tfm, key, klen); + sg_init_one(&sg, pos, plen + 4); + if (crypto_blkcipher_decrypt(&desc, &sg, &sg, plen + 4)) + return -7; + + crc = ~crc32_le(~0, pos, plen); + icv[0] = crc; + icv[1] = crc >> 8; + icv[2] = crc >> 16; + icv[3] = crc >> 24; + if (memcmp(icv, pos + plen, 4) != 0) { + /* ICV mismatch - drop frame */ + return -2; + } + + /* Remove IV and ICV */ + memmove(skb->data + 4, skb->data, hdr_len); + skb_pull(skb, 4); + skb_trim(skb, skb->len - 4); + + return 0; +} + +static int lib80211_wep_set_key(void *key, int len, u8 * seq, void *priv) +{ + struct lib80211_wep_data *wep = priv; + + if (len < 0 || len > WEP_KEY_LEN) + return -1; + + memcpy(wep->key, key, len); + wep->key_len = len; + + return 0; +} + +static int lib80211_wep_get_key(void *key, int len, u8 * seq, void *priv) +{ + struct lib80211_wep_data *wep = priv; + + if (len < wep->key_len) + return -1; + + memcpy(key, wep->key, wep->key_len); + + return wep->key_len; +} + +static char *lib80211_wep_print_stats(char *p, void *priv) +{ + struct lib80211_wep_data *wep = priv; + p += sprintf(p, "key[%d] alg=WEP len=%d\n", wep->key_idx, wep->key_len); + return p; +} + +static struct lib80211_crypto_ops lib80211_crypt_wep = { + .name = "WEP", + .init = lib80211_wep_init, + .deinit = lib80211_wep_deinit, + .build_iv = lib80211_wep_build_iv, + .encrypt_mpdu = lib80211_wep_encrypt, + .decrypt_mpdu = lib80211_wep_decrypt, + .encrypt_msdu = NULL, + .decrypt_msdu = NULL, + .set_key = lib80211_wep_set_key, + .get_key = lib80211_wep_get_key, + .print_stats = lib80211_wep_print_stats, + .extra_mpdu_prefix_len = 4, /* IV */ + .extra_mpdu_postfix_len = 4, /* ICV */ + .owner = THIS_MODULE, +}; + +static int __init lib80211_crypto_wep_init(void) +{ + return lib80211_register_crypto_ops(&lib80211_crypt_wep); +} + +static void __exit lib80211_crypto_wep_exit(void) +{ + lib80211_unregister_crypto_ops(&lib80211_crypt_wep); +} + +module_init(lib80211_crypto_wep_init); +module_exit(lib80211_crypto_wep_exit); -- cgit v1.2.3 From 2ba4b32ecf748d5f45f298fc9677fa46d1dd9aff Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Tue, 11 Nov 2008 16:00:06 -0500 Subject: lib80211: consolidate crypt init routines Signed-off-by: John W. Linville --- drivers/net/wireless/hostap/hostap_hw.c | 61 +----------------------------- drivers/net/wireless/hostap/hostap_ioctl.c | 43 +++++---------------- include/net/lib80211.h | 3 ++ net/ieee80211/ieee80211_module.c | 26 +------------ net/wireless/lib80211.c | 38 +++++++++++++++++++ 5 files changed, 54 insertions(+), 117 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c index 066299fc9259..0f27059bbe85 100644 --- a/drivers/net/wireless/hostap/hostap_hw.c +++ b/drivers/net/wireless/hostap/hostap_hw.c @@ -2788,46 +2788,6 @@ static void prism2_check_sta_fw_version(local_info_t *local) } -static void prism2_crypt_deinit_entries(local_info_t *local, int force) -{ - struct list_head *ptr, *n; - struct lib80211_crypt_data *entry; - - for (ptr = local->crypt_info.crypt_deinit_list.next, n = ptr->next; - ptr != &local->crypt_info.crypt_deinit_list; - ptr = n, n = ptr->next) { - entry = list_entry(ptr, struct lib80211_crypt_data, list); - - if (atomic_read(&entry->refcnt) != 0 && !force) - continue; - - list_del(ptr); - - if (entry->ops) - entry->ops->deinit(entry->priv); - kfree(entry); - } -} - - -static void prism2_crypt_deinit_handler(unsigned long data) -{ - local_info_t *local = (local_info_t *) data; - unsigned long flags; - - spin_lock_irqsave(&local->lock, flags); - prism2_crypt_deinit_entries(local, 0); - if (!list_empty(&local->crypt_info.crypt_deinit_list)) { - printk(KERN_DEBUG "%s: entries remaining in delayed crypt " - "deletion list\n", local->dev->name); - local->crypt_info.crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&local->crypt_info.crypt_deinit_timer); - } - spin_unlock_irqrestore(&local->lock, flags); - -} - - static void hostap_passive_scan(unsigned long data) { local_info_t *local = (local_info_t *) data; @@ -3252,12 +3212,7 @@ while (0) INIT_LIST_HEAD(&local->cmd_queue); init_waitqueue_head(&local->hostscan_wq); - local->crypt_info.name = dev->name; - local->crypt_info.lock = &local->lock; - INIT_LIST_HEAD(&local->crypt_info.crypt_deinit_list); - init_timer(&local->crypt_info.crypt_deinit_timer); - local->crypt_info.crypt_deinit_timer.data = (unsigned long) local; - local->crypt_info.crypt_deinit_timer.function = prism2_crypt_deinit_handler; + lib80211_crypt_info_init(&local->crypt_info, dev->name, &local->lock); init_timer(&local->passive_scan_timer); local->passive_scan_timer.data = (unsigned long) local; @@ -3358,9 +3313,7 @@ static void prism2_free_local_data(struct net_device *dev) flush_scheduled_work(); - if (timer_pending(&local->crypt_info.crypt_deinit_timer)) - del_timer(&local->crypt_info.crypt_deinit_timer); - prism2_crypt_deinit_entries(local, 1); + lib80211_crypt_info_free(&local->crypt_info); if (timer_pending(&local->passive_scan_timer)) del_timer(&local->passive_scan_timer); @@ -3377,16 +3330,6 @@ static void prism2_free_local_data(struct net_device *dev) if (local->dev_enabled) prism2_callback(local, PRISM2_CALLBACK_DISABLE); - for (i = 0; i < WEP_KEYS; i++) { - struct lib80211_crypt_data *crypt = local->crypt_info.crypt[i]; - if (crypt) { - if (crypt->ops) - crypt->ops->deinit(crypt->priv); - kfree(crypt); - local->crypt_info.crypt[i] = NULL; - } - } - if (local->ap != NULL) hostap_free_data(local->ap); diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c index 29aebb679099..c40fdf4c79de 100644 --- a/drivers/net/wireless/hostap/hostap_ioctl.c +++ b/drivers/net/wireless/hostap/hostap_ioctl.c @@ -116,32 +116,6 @@ static int prism2_get_name(struct net_device *dev, } -static void prism2_crypt_delayed_deinit(local_info_t *local, - struct lib80211_crypt_data **crypt) -{ - struct lib80211_crypt_data *tmp; - unsigned long flags; - - tmp = *crypt; - *crypt = NULL; - - if (tmp == NULL) - return; - - /* must not run ops->deinit() while there may be pending encrypt or - * decrypt operations. Use a list of delayed deinits to avoid needing - * locking. */ - - spin_lock_irqsave(&local->lock, flags); - list_add(&tmp->list, &local->crypt_info.crypt_deinit_list); - if (!timer_pending(&local->crypt_info.crypt_deinit_timer)) { - local->crypt_info.crypt_deinit_timer.expires = jiffies + HZ; - add_timer(&local->crypt_info.crypt_deinit_timer); - } - spin_unlock_irqrestore(&local->lock, flags); -} - - static int prism2_ioctl_siwencode(struct net_device *dev, struct iw_request_info *info, struct iw_point *erq, char *keybuf) @@ -166,14 +140,14 @@ static int prism2_ioctl_siwencode(struct net_device *dev, if (erq->flags & IW_ENCODE_DISABLED) { if (*crypt) - prism2_crypt_delayed_deinit(local, crypt); + lib80211_crypt_delayed_deinit(&local->crypt_info, crypt); goto done; } if (*crypt != NULL && (*crypt)->ops != NULL && strcmp((*crypt)->ops->name, "WEP") != 0) { /* changing to use WEP; deinit previously used algorithm */ - prism2_crypt_delayed_deinit(local, crypt); + lib80211_crypt_delayed_deinit(&local->crypt_info, crypt); } if (*crypt == NULL) { @@ -189,7 +163,7 @@ static int prism2_ioctl_siwencode(struct net_device *dev, request_module("lib80211_crypt_wep"); new_crypt->ops = lib80211_get_crypto_ops("WEP"); } - if (new_crypt->ops) + if (new_crypt->ops && try_module_get(new_crypt->ops->owner)) new_crypt->priv = new_crypt->ops->init(i); if (!new_crypt->ops || !new_crypt->priv) { kfree(new_crypt); @@ -3269,7 +3243,7 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev, if ((erq->flags & IW_ENCODE_DISABLED) || ext->alg == IW_ENCODE_ALG_NONE) { if (*crypt) - prism2_crypt_delayed_deinit(local, crypt); + lib80211_crypt_delayed_deinit(&local->crypt_info, crypt); goto done; } @@ -3317,7 +3291,7 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev, if (*crypt == NULL || (*crypt)->ops != ops) { struct lib80211_crypt_data *new_crypt; - prism2_crypt_delayed_deinit(local, crypt); + lib80211_crypt_delayed_deinit(&local->crypt_info, crypt); new_crypt = kzalloc(sizeof(struct lib80211_crypt_data), GFP_KERNEL); @@ -3326,7 +3300,8 @@ static int prism2_ioctl_siwencodeext(struct net_device *dev, goto done; } new_crypt->ops = ops; - new_crypt->priv = new_crypt->ops->init(i); + if (new_crypt->ops && try_module_get(new_crypt->ops->owner)) + new_crypt->priv = new_crypt->ops->init(i); if (new_crypt->priv == NULL) { kfree(new_crypt); ret = -EINVAL; @@ -3503,7 +3478,7 @@ static int prism2_ioctl_set_encryption(local_info_t *local, if (strcmp(param->u.crypt.alg, "none") == 0) { if (crypt) - prism2_crypt_delayed_deinit(local, crypt); + lib80211_crypt_delayed_deinit(&local->crypt_info, crypt); goto done; } @@ -3533,7 +3508,7 @@ static int prism2_ioctl_set_encryption(local_info_t *local, if (*crypt == NULL || (*crypt)->ops != ops) { struct lib80211_crypt_data *new_crypt; - prism2_crypt_delayed_deinit(local, crypt); + lib80211_crypt_delayed_deinit(&local->crypt_info, crypt); new_crypt = kzalloc(sizeof(struct lib80211_crypt_data), GFP_KERNEL); diff --git a/include/net/lib80211.h b/include/net/lib80211.h index dd1079f98da4..a269b23d1125 100644 --- a/include/net/lib80211.h +++ b/include/net/lib80211.h @@ -114,6 +114,9 @@ struct lib80211_crypt_info { int crypt_quiesced; }; +int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, + spinlock_t *lock); +void lib80211_crypt_info_free(struct lib80211_crypt_info *info); int lib80211_register_crypto_ops(struct lib80211_crypto_ops *ops); int lib80211_unregister_crypto_ops(struct lib80211_crypto_ops *ops); struct lib80211_crypto_ops *lib80211_get_crypto_ops(const char *name); diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index cf21f0bd8569..a2f5616d5b09 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -182,13 +182,7 @@ struct net_device *alloc_ieee80211(int sizeof_priv) spin_lock_init(&ieee->lock); - ieee->crypt_info.name = dev->name; - ieee->crypt_info.lock = &ieee->lock; - INIT_LIST_HEAD(&ieee->crypt_info.crypt_deinit_list); - setup_timer(&ieee->crypt_info.crypt_deinit_timer, - lib80211_crypt_deinit_handler, - (unsigned long)&ieee->crypt_info); - ieee->crypt_info.crypt_quiesced = 0; + lib80211_crypt_info_init(&ieee->crypt_info, dev->name, &ieee->lock); ieee->wpa_enabled = 0; ieee->drop_unencrypted = 0; @@ -206,23 +200,7 @@ void free_ieee80211(struct net_device *dev) { struct ieee80211_device *ieee = netdev_priv(dev); - int i; - - lib80211_crypt_quiescing(&ieee->crypt_info); - del_timer_sync(&ieee->crypt_info.crypt_deinit_timer); - lib80211_crypt_deinit_entries(&ieee->crypt_info, 1); - - for (i = 0; i < WEP_KEYS; i++) { - struct lib80211_crypt_data *crypt = ieee->crypt_info.crypt[i]; - if (crypt) { - if (crypt->ops) { - crypt->ops->deinit(crypt->priv); - module_put(crypt->ops->owner); - } - kfree(crypt); - ieee->crypt_info.crypt[i] = NULL; - } - } + lib80211_crypt_info_free(&ieee->crypt_info); ieee80211_networks_free(ieee); free_netdev(dev); diff --git a/net/wireless/lib80211.c b/net/wireless/lib80211.c index d681721f4acb..97d411f74507 100644 --- a/net/wireless/lib80211.c +++ b/net/wireless/lib80211.c @@ -71,6 +71,44 @@ const char *print_ssid(char *buf, const char *ssid, u8 ssid_len) } EXPORT_SYMBOL(print_ssid); +int lib80211_crypt_info_init(struct lib80211_crypt_info *info, char *name, + spinlock_t *lock) +{ + memset(info, 0, sizeof(*info)); + + info->name = name; + info->lock = lock; + + INIT_LIST_HEAD(&info->crypt_deinit_list); + setup_timer(&info->crypt_deinit_timer, lib80211_crypt_deinit_handler, + (unsigned long)info); + + return 0; +} +EXPORT_SYMBOL(lib80211_crypt_info_init); + +void lib80211_crypt_info_free(struct lib80211_crypt_info *info) +{ + int i; + + lib80211_crypt_quiescing(info); + del_timer_sync(&info->crypt_deinit_timer); + lib80211_crypt_deinit_entries(info, 1); + + for (i = 0; i < NUM_WEP_KEYS; i++) { + struct lib80211_crypt_data *crypt = info->crypt[i]; + if (crypt) { + if (crypt->ops) { + crypt->ops->deinit(crypt->priv); + module_put(crypt->ops->owner); + } + kfree(crypt); + info->crypt[i] = NULL; + } + } +} +EXPORT_SYMBOL(lib80211_crypt_info_free); + void lib80211_crypt_deinit_entries(struct lib80211_crypt_info *info, int force) { struct lib80211_crypt_data *entry, *next; -- cgit v1.2.3 From 627271018df75c8861b9e75b39d5995842e6ec95 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 12 Nov 2008 10:01:41 -0500 Subject: mac80211: add explicit padding in struct ieee80211_tx_info Otherwise, the BUILD_BUG_ON calls in ieee80211_tx_info_clear_status can fail on some architectures. Signed-off-by: John W. Linville --- include/net/mac80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0dbbfc7b0509..6a1d4ea18186 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -341,6 +341,7 @@ struct ieee80211_tx_info { u8 antenna_sel_tx; /* 2 byte hole */ + u8 pad[2]; union { struct { -- cgit v1.2.3 From a1eb5fe319beb9e181aa52c8adf75ad9aab56a89 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Wed, 19 Nov 2008 09:37:43 +0200 Subject: wireless: missing include in lib80211.h This patch adds #include in lib80211.h to avoid these compilation erros. > In file included from /work/src/wireless-testing/net/wireless/lib80211.c:24: > /work/src/wireless-testing/include/net/lib80211.h:113: error: field > 'crypt_deinit_timer' has incomplete type > /work/src/wireless-testing/net/wireless/lib80211.c: In function > 'lib80211_crypt_info_init': > /work/src/wireless-testing/net/wireless/lib80211.c:83: error: implicit > declaration of function 'setup_timer' > /work/src/wireless-testing/net/wireless/lib80211.c: In function > 'lib80211_crypt_info_free': > /work/src/wireless-testing/net/wireless/lib80211.c:95: error: implicit > declaration of function 'del_timer_sync' > /work/src/wireless-testing/net/wireless/lib80211.c: In function > 'lib80211_crypt_deinit_handler': > /work/src/wireless-testing/net/wireless/lib80211.c:157: error: > implicit declaration of function 'add_timer' > /work/src/wireless-testing/net/wireless/lib80211.c: In function > 'lib80211_crypt_delayed_deinit': > /work/src/wireless-testing/net/wireless/lib80211.c:182: error: > implicit declaration of function 'timer_pending' > make[3]: *** [net/wireless/lib80211.o] Error 1 > make[2]: *** [net/wireless] Error 2 > make[1]: *** [net] Error 2 > make: *** [sub-make] Error 2 Signed-off-by: Rami Rosen Signed-off-by: John W. Linville --- include/net/lib80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/lib80211.h b/include/net/lib80211.h index a269b23d1125..fb4e2784857d 100644 --- a/include/net/lib80211.h +++ b/include/net/lib80211.h @@ -30,7 +30,7 @@ #include #include #include - +#include /* print_ssid() is intended to be used in debug (and possibly error) * messages. It should never be used for passing ssid to user space. */ const char *print_ssid(char *buf, const char *ssid, u8 ssid_len); -- cgit v1.2.3 From f757fec4b0d45dfcb52f9a914a12225a6a0a3e05 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Nov 2008 15:49:19 -0800 Subject: net: use net_eq() in INET_MATCH and INET_TW_MATCH We can avoid some useless instructions if !CONFIG_NET_NS Because of RCU, we use INET_MATCH or INET_TW_MATCH twice for the found socket, so thats six instructions less per incoming TCP packet. Yet another tbench speedup :) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 28b3ee3e8d6d..ec7ee2e46d8c 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -291,25 +291,25 @@ typedef __u64 __bitwise __addrpair; ((__force __u64)(__be32)(__saddr))); #endif /* __BIG_ENDIAN */ #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ - (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ + (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET_TW_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ - (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ + (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) #define INET_MATCH(__sk, __net, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ + (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET_TW_MATCH(__sk, __net, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ - (((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net) && \ + (((__sk)->sk_hash == (__hash)) && net_eq(sock_net(__sk), (__net)) && \ (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ -- cgit v1.2.3 From 2baf8a2daab65cdd3f20bfeb4676a2f6aff7c3bf Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Fri, 21 Nov 2008 16:34:18 -0800 Subject: netdevice hdlc: Convert directly reference of netdev->priv For killing directly reference of netdev->priv, use netdev->ml_priv to replace it. Because the private pvc data comes from add_pvc() and can't be allocated in alloc_netdev(). Signed-off-by: Wang Chen Acked-by: Krzysztof Halasa Signed-off-by: David S. Miller --- drivers/net/wan/hdlc_fr.c | 10 +++++----- include/linux/hdlc.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index d3d5055741ad..f1ddd7c3459c 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -342,7 +342,7 @@ static int fr_hard_header(struct sk_buff **skb_p, u16 dlci) static int pvc_open(struct net_device *dev) { - pvc_device *pvc = dev->priv; + pvc_device *pvc = dev->ml_priv; if ((pvc->frad->flags & IFF_UP) == 0) return -EIO; /* Frad must be UP in order to activate PVC */ @@ -362,7 +362,7 @@ static int pvc_open(struct net_device *dev) static int pvc_close(struct net_device *dev) { - pvc_device *pvc = dev->priv; + pvc_device *pvc = dev->ml_priv; if (--pvc->open_count == 0) { hdlc_device *hdlc = dev_to_hdlc(pvc->frad); @@ -381,7 +381,7 @@ static int pvc_close(struct net_device *dev) static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - pvc_device *pvc = dev->priv; + pvc_device *pvc = dev->ml_priv; fr_proto_pvc_info info; if (ifr->ifr_settings.type == IF_GET_PROTO) { @@ -409,7 +409,7 @@ static int pvc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static int pvc_xmit(struct sk_buff *skb, struct net_device *dev) { - pvc_device *pvc = dev->priv; + pvc_device *pvc = dev->ml_priv; if (pvc->state.active) { if (dev->type == ARPHRD_ETHER) { @@ -1111,7 +1111,7 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) dev->change_mtu = pvc_change_mtu; dev->mtu = HDLC_MAX_MTU; dev->tx_queue_len = 0; - dev->priv = pvc; + dev->ml_priv = pvc; result = dev_alloc_name(dev, dev->name); if (result < 0) { diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index c59769693bee..e960faac609d 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -80,7 +80,7 @@ struct net_device *alloc_hdlcdev(void *priv); static inline struct hdlc_device* dev_to_hdlc(struct net_device *dev) { - return dev->priv; + return netdev_priv(dev); } static __inline__ void debug_frame(const struct sk_buff *skb) -- cgit v1.2.3 From 72364706c3b7c09a658e356218a918c5f92dcad0 Mon Sep 17 00:00:00 2001 From: Krzysztof HaÅ‚asa Date: Thu, 14 Aug 2008 19:18:17 +0200 Subject: WAN: syncppp.c is no longer used by any kernel code. Remove it. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Krzysztof HaÅ‚asa --- Documentation/DocBook/Makefile | 2 +- Documentation/DocBook/networking.tmpl | 3 - Documentation/DocBook/wanbook.tmpl | 99 --- drivers/net/wan/syncppp.c | 1476 --------------------------------- include/net/syncppp.h | 102 --- 5 files changed, 1 insertion(+), 1681 deletions(-) delete mode 100644 Documentation/DocBook/wanbook.tmpl delete mode 100644 drivers/net/wan/syncppp.c delete mode 100644 include/net/syncppp.h (limited to 'include') diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 9b1f6ca100d1..0a08126d3094 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -6,7 +6,7 @@ # To add a new book the only step required is to add the book to the # list of DOCBOOKS. -DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml \ +DOCBOOKS := z8530book.xml mcabook.xml \ kernel-hacking.xml kernel-locking.xml deviceiobook.xml \ procfs-guide.xml writing_usb_driver.xml networking.xml \ kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \ diff --git a/Documentation/DocBook/networking.tmpl b/Documentation/DocBook/networking.tmpl index f24f9e85e4ae..627707a3cb9d 100644 --- a/Documentation/DocBook/networking.tmpl +++ b/Documentation/DocBook/networking.tmpl @@ -98,9 +98,6 @@ X!Enet/core/wireless.c --> - Synchronous PPP -!Edrivers/net/wan/syncppp.c - diff --git a/Documentation/DocBook/wanbook.tmpl b/Documentation/DocBook/wanbook.tmpl deleted file mode 100644 index 8c93db122f04..000000000000 --- a/Documentation/DocBook/wanbook.tmpl +++ /dev/null @@ -1,99 +0,0 @@ - - - - - - Synchronous PPP and Cisco HDLC Programming Guide - - - - Alan - Cox - -
- alan@lxorguk.ukuu.org.uk -
-
-
-
- - - 2000 - Alan Cox - - - - - This documentation is free software; you can redistribute - it and/or modify it under the terms of the GNU General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later - version. - - - - This program is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied - warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - - - You should have received a copy of the GNU General Public - License along with this program; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, - MA 02111-1307 USA - - - - For more details see the file COPYING in the source - distribution of Linux. - - -
- - - - - Introduction - - The syncppp drivers in Linux provide a fairly complete - implementation of Cisco HDLC and a minimal implementation of - PPP. The longer term goal is to switch the PPP layer to the - generic PPP interface that is new in Linux 2.3.x. The API should - remain unchanged when this is done, but support will then be - available for IPX, compression and other PPP features - - - - Known Bugs And Assumptions - - - PPP is minimal - - - The current PPP implementation is very basic, although sufficient - for most wan usages. - - - - Cisco HDLC Quirks - - - Currently we do not end all packets with the correct Cisco multicast - or unicast flags. Nothing appears to mind too much but this should - be corrected. - - - - - - - - - Public Functions Provided -!Edrivers/net/wan/syncppp.c - - -
diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c deleted file mode 100644 index 58ae8a2223af..000000000000 --- a/drivers/net/wan/syncppp.c +++ /dev/null @@ -1,1476 +0,0 @@ -/* - * NET3: A (fairly minimal) implementation of synchronous PPP for Linux - * as well as a CISCO HDLC implementation. See the copyright - * message below for the original source. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the license, or (at your option) any later version. - * - * Note however. This code is also used in a different form by FreeBSD. - * Therefore when making any non OS specific change please consider - * contributing it back to the original author under the terms - * below in addition. - * -- Alan - * - * Port for Linux-2.1 by Jan "Yenya" Kasprzak - */ - -/* - * Synchronous PPP/Cisco link level subroutines. - * Keepalive protocol implemented in both Cisco and PPP modes. - * - * Copyright (C) 1994 Cronyx Ltd. - * Author: Serge Vakulenko, - * - * This software is distributed with NO WARRANTIES, not even the implied - * warranties for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * Authors grant any other persons or organisations permission to use - * or modify this software as long as this message is kept with the software, - * all derivative works or modified versions. - * - * Version 1.9, Wed Oct 4 18:58:15 MSK 1995 - * - * $Id: syncppp.c,v 1.18 2000/04/11 05:25:31 asj Exp $ - */ -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include - -#define MAXALIVECNT 6 /* max. alive packets */ - -#define PPP_ALLSTATIONS 0xff /* All-Stations broadcast address */ -#define PPP_UI 0x03 /* Unnumbered Information */ -#define PPP_IP 0x0021 /* Internet Protocol */ -#define PPP_ISO 0x0023 /* ISO OSI Protocol */ -#define PPP_XNS 0x0025 /* Xerox NS Protocol */ -#define PPP_IPX 0x002b /* Novell IPX Protocol */ -#define PPP_LCP 0xc021 /* Link Control Protocol */ -#define PPP_IPCP 0x8021 /* Internet Protocol Control Protocol */ - -#define LCP_CONF_REQ 1 /* PPP LCP configure request */ -#define LCP_CONF_ACK 2 /* PPP LCP configure acknowledge */ -#define LCP_CONF_NAK 3 /* PPP LCP configure negative ack */ -#define LCP_CONF_REJ 4 /* PPP LCP configure reject */ -#define LCP_TERM_REQ 5 /* PPP LCP terminate request */ -#define LCP_TERM_ACK 6 /* PPP LCP terminate acknowledge */ -#define LCP_CODE_REJ 7 /* PPP LCP code reject */ -#define LCP_PROTO_REJ 8 /* PPP LCP protocol reject */ -#define LCP_ECHO_REQ 9 /* PPP LCP echo request */ -#define LCP_ECHO_REPLY 10 /* PPP LCP echo reply */ -#define LCP_DISC_REQ 11 /* PPP LCP discard request */ - -#define LCP_OPT_MRU 1 /* maximum receive unit */ -#define LCP_OPT_ASYNC_MAP 2 /* async control character map */ -#define LCP_OPT_AUTH_PROTO 3 /* authentication protocol */ -#define LCP_OPT_QUAL_PROTO 4 /* quality protocol */ -#define LCP_OPT_MAGIC 5 /* magic number */ -#define LCP_OPT_RESERVED 6 /* reserved */ -#define LCP_OPT_PROTO_COMP 7 /* protocol field compression */ -#define LCP_OPT_ADDR_COMP 8 /* address/control field compression */ - -#define IPCP_CONF_REQ LCP_CONF_REQ /* PPP IPCP configure request */ -#define IPCP_CONF_ACK LCP_CONF_ACK /* PPP IPCP configure acknowledge */ -#define IPCP_CONF_NAK LCP_CONF_NAK /* PPP IPCP configure negative ack */ -#define IPCP_CONF_REJ LCP_CONF_REJ /* PPP IPCP configure reject */ -#define IPCP_TERM_REQ LCP_TERM_REQ /* PPP IPCP terminate request */ -#define IPCP_TERM_ACK LCP_TERM_ACK /* PPP IPCP terminate acknowledge */ -#define IPCP_CODE_REJ LCP_CODE_REJ /* PPP IPCP code reject */ - -#define CISCO_MULTICAST 0x8f /* Cisco multicast address */ -#define CISCO_UNICAST 0x0f /* Cisco unicast address */ -#define CISCO_KEEPALIVE 0x8035 /* Cisco keepalive protocol */ -#define CISCO_ADDR_REQ 0 /* Cisco address request */ -#define CISCO_ADDR_REPLY 1 /* Cisco address reply */ -#define CISCO_KEEPALIVE_REQ 2 /* Cisco keepalive request */ - -struct ppp_header { - u8 address; - u8 control; - __be16 protocol; -}; -#define PPP_HEADER_LEN sizeof (struct ppp_header) - -struct lcp_header { - u8 type; - u8 ident; - __be16 len; -}; -#define LCP_HEADER_LEN sizeof (struct lcp_header) - -struct cisco_packet { - __be32 type; - __be32 par1; - __be32 par2; - __be16 rel; - __be16 time0; - __be16 time1; -}; -#define CISCO_PACKET_LEN 18 -#define CISCO_BIG_PACKET_LEN 20 - -static struct sppp *spppq; -static struct timer_list sppp_keepalive_timer; -static DEFINE_SPINLOCK(spppq_lock); - -/* global xmit queue for sending packets while spinlock is held */ -static struct sk_buff_head tx_queue; - -static void sppp_keepalive (unsigned long dummy); -static void sppp_cp_send (struct sppp *sp, u16 proto, u8 type, - u8 ident, u16 len, void *data); -static void sppp_cisco_send (struct sppp *sp, int type, u32 par1, u32 par2); -static void sppp_lcp_input (struct sppp *sp, struct sk_buff *m); -static void sppp_cisco_input (struct sppp *sp, struct sk_buff *m); -static void sppp_ipcp_input (struct sppp *sp, struct sk_buff *m); -static void sppp_lcp_open (struct sppp *sp); -static void sppp_ipcp_open (struct sppp *sp); -static int sppp_lcp_conf_parse_options (struct sppp *sp, struct lcp_header *h, - int len, u32 *magic); -static void sppp_cp_timeout (unsigned long arg); -static char *sppp_lcp_type_name (u8 type); -static char *sppp_ipcp_type_name (u8 type); -static void sppp_print_bytes (u8 *p, u16 len); - -static int debug; - -/* Flush global outgoing packet queue to dev_queue_xmit(). - * - * dev_queue_xmit() must be called with interrupts enabled - * which means it can't be called with spinlocks held. - * If a packet needs to be sent while a spinlock is held, - * then put the packet into tx_queue, and call sppp_flush_xmit() - * after spinlock is released. - */ -static void sppp_flush_xmit(void) -{ - struct sk_buff *skb; - while ((skb = skb_dequeue(&tx_queue)) != NULL) - dev_queue_xmit(skb); -} - -/* - * Interface down stub - */ - -static void if_down(struct net_device *dev) -{ - struct sppp *sp = (struct sppp *)sppp_of(dev); - - sp->pp_link_state=SPPP_LINK_DOWN; -} - -/* - * Timeout routine activations. - */ - -static void sppp_set_timeout(struct sppp *p,int s) -{ - if (! (p->pp_flags & PP_TIMO)) - { - init_timer(&p->pp_timer); - p->pp_timer.function=sppp_cp_timeout; - p->pp_timer.expires=jiffies+s*HZ; - p->pp_timer.data=(unsigned long)p; - p->pp_flags |= PP_TIMO; - add_timer(&p->pp_timer); - } -} - -static void sppp_clear_timeout(struct sppp *p) -{ - if (p->pp_flags & PP_TIMO) - { - del_timer(&p->pp_timer); - p->pp_flags &= ~PP_TIMO; - } -} - -/** - * sppp_input - receive and process a WAN PPP frame - * @skb: The buffer to process - * @dev: The device it arrived on - * - * This can be called directly by cards that do not have - * timing constraints but is normally called from the network layer - * after interrupt servicing to process frames queued via netif_rx(). - * - * We process the options in the card. If the frame is destined for - * the protocol stacks then it requeues the frame for the upper level - * protocol. If it is a control from it is processed and discarded - * here. - */ - -static void sppp_input (struct net_device *dev, struct sk_buff *skb) -{ - struct ppp_header *h; - struct sppp *sp = (struct sppp *)sppp_of(dev); - unsigned long flags; - - skb->dev=dev; - skb_reset_mac_header(skb); - - if (!pskb_may_pull(skb, PPP_HEADER_LEN)) { - /* Too small packet, drop it. */ - if (sp->pp_flags & PP_DEBUG) - printk (KERN_DEBUG "%s: input packet is too small, %d bytes\n", - dev->name, skb->len); - kfree_skb(skb); - return; - } - - /* Get PPP header. */ - h = (struct ppp_header *)skb->data; - skb_pull(skb,sizeof(struct ppp_header)); - - spin_lock_irqsave(&sp->lock, flags); - - switch (h->address) { - default: /* Invalid PPP packet. */ - goto invalid; - case PPP_ALLSTATIONS: - if (h->control != PPP_UI) - goto invalid; - if (sp->pp_flags & PP_CISCO) { - if (sp->pp_flags & PP_DEBUG) - printk (KERN_WARNING "%s: PPP packet in Cisco mode <0x%x 0x%x 0x%x>\n", - dev->name, - h->address, h->control, ntohs (h->protocol)); - goto drop; - } - switch (ntohs (h->protocol)) { - default: - if (sp->lcp.state == LCP_STATE_OPENED) - sppp_cp_send (sp, PPP_LCP, LCP_PROTO_REJ, - ++sp->pp_seq, skb->len + 2, - &h->protocol); - if (sp->pp_flags & PP_DEBUG) - printk (KERN_WARNING "%s: invalid input protocol <0x%x 0x%x 0x%x>\n", - dev->name, - h->address, h->control, ntohs (h->protocol)); - goto drop; - case PPP_LCP: - sppp_lcp_input (sp, skb); - goto drop; - case PPP_IPCP: - if (sp->lcp.state == LCP_STATE_OPENED) - sppp_ipcp_input (sp, skb); - else - printk(KERN_DEBUG "IPCP when still waiting LCP finish.\n"); - goto drop; - case PPP_IP: - if (sp->ipcp.state == IPCP_STATE_OPENED) { - if(sp->pp_flags&PP_DEBUG) - printk(KERN_DEBUG "Yow an IP frame.\n"); - skb->protocol=htons(ETH_P_IP); - netif_rx(skb); - goto done; - } - break; -#ifdef IPX - case PPP_IPX: - /* IPX IPXCP not implemented yet */ - if (sp->lcp.state == LCP_STATE_OPENED) { - skb->protocol=htons(ETH_P_IPX); - netif_rx(skb); - goto done; - } - break; -#endif - } - break; - case CISCO_MULTICAST: - case CISCO_UNICAST: - /* Don't check the control field here (RFC 1547). */ - if (! (sp->pp_flags & PP_CISCO)) { - if (sp->pp_flags & PP_DEBUG) - printk (KERN_WARNING "%s: Cisco packet in PPP mode <0x%x 0x%x 0x%x>\n", - dev->name, - h->address, h->control, ntohs (h->protocol)); - goto drop; - } - switch (ntohs (h->protocol)) { - default: - goto invalid; - case CISCO_KEEPALIVE: - sppp_cisco_input (sp, skb); - goto drop; -#ifdef CONFIG_INET - case ETH_P_IP: - skb->protocol=htons(ETH_P_IP); - netif_rx(skb); - goto done; -#endif -#ifdef CONFIG_IPX - case ETH_P_IPX: - skb->protocol=htons(ETH_P_IPX); - netif_rx(skb); - goto done; -#endif - } - break; - } - goto drop; - -invalid: - if (sp->pp_flags & PP_DEBUG) - printk (KERN_WARNING "%s: invalid input packet <0x%x 0x%x 0x%x>\n", - dev->name, h->address, h->control, ntohs (h->protocol)); -drop: - kfree_skb(skb); -done: - spin_unlock_irqrestore(&sp->lock, flags); - sppp_flush_xmit(); - return; -} - -/* - * Handle transmit packets. - */ - -static int sppp_hard_header(struct sk_buff *skb, - struct net_device *dev, __u16 type, - const void *daddr, const void *saddr, - unsigned int len) -{ - struct sppp *sp = (struct sppp *)sppp_of(dev); - struct ppp_header *h; - skb_push(skb,sizeof(struct ppp_header)); - h=(struct ppp_header *)skb->data; - if(sp->pp_flags&PP_CISCO) - { - h->address = CISCO_UNICAST; - h->control = 0; - } - else - { - h->address = PPP_ALLSTATIONS; - h->control = PPP_UI; - } - if(sp->pp_flags & PP_CISCO) - { - h->protocol = htons(type); - } - else switch(type) - { - case ETH_P_IP: - h->protocol = htons(PPP_IP); - break; - case ETH_P_IPX: - h->protocol = htons(PPP_IPX); - break; - } - return sizeof(struct ppp_header); -} - -static const struct header_ops sppp_header_ops = { - .create = sppp_hard_header, -}; - -/* - * Send keepalive packets, every 10 seconds. - */ - -static void sppp_keepalive (unsigned long dummy) -{ - struct sppp *sp; - unsigned long flags; - - spin_lock_irqsave(&spppq_lock, flags); - - for (sp=spppq; sp; sp=sp->pp_next) - { - struct net_device *dev = sp->pp_if; - - /* Keepalive mode disabled or channel down? */ - if (! (sp->pp_flags & PP_KEEPALIVE) || - ! (dev->flags & IFF_UP)) - continue; - - spin_lock(&sp->lock); - - /* No keepalive in PPP mode if LCP not opened yet. */ - if (! (sp->pp_flags & PP_CISCO) && - sp->lcp.state != LCP_STATE_OPENED) { - spin_unlock(&sp->lock); - continue; - } - - if (sp->pp_alivecnt == MAXALIVECNT) { - /* No keepalive packets got. Stop the interface. */ - printk (KERN_WARNING "%s: protocol down\n", dev->name); - if_down (dev); - if (! (sp->pp_flags & PP_CISCO)) { - /* Shut down the PPP link. */ - sp->lcp.magic = jiffies; - sp->lcp.state = LCP_STATE_CLOSED; - sp->ipcp.state = IPCP_STATE_CLOSED; - sppp_clear_timeout (sp); - /* Initiate negotiation. */ - sppp_lcp_open (sp); - } - } - if (sp->pp_alivecnt <= MAXALIVECNT) - ++sp->pp_alivecnt; - if (sp->pp_flags & PP_CISCO) - sppp_cisco_send (sp, CISCO_KEEPALIVE_REQ, ++sp->pp_seq, - sp->pp_rseq); - else if (sp->lcp.state == LCP_STATE_OPENED) { - __be32 nmagic = htonl (sp->lcp.magic); - sp->lcp.echoid = ++sp->pp_seq; - sppp_cp_send (sp, PPP_LCP, LCP_ECHO_REQ, - sp->lcp.echoid, 4, &nmagic); - } - - spin_unlock(&sp->lock); - } - spin_unlock_irqrestore(&spppq_lock, flags); - sppp_flush_xmit(); - sppp_keepalive_timer.expires=jiffies+10*HZ; - add_timer(&sppp_keepalive_timer); -} - -/* - * Handle incoming PPP Link Control Protocol packets. - */ - -static void sppp_lcp_input (struct sppp *sp, struct sk_buff *skb) -{ - struct lcp_header *h; - struct net_device *dev = sp->pp_if; - int len = skb->len; - u8 *p, opt[6]; - u32 rmagic = 0; - - if (!pskb_may_pull(skb, sizeof(struct lcp_header))) { - if (sp->pp_flags & PP_DEBUG) - printk (KERN_WARNING "%s: invalid lcp packet length: %d bytes\n", - dev->name, len); - return; - } - h = (struct lcp_header *)skb->data; - skb_pull(skb,sizeof(struct lcp_header *)); - - if (sp->pp_flags & PP_DEBUG) - { - char state = '?'; - switch (sp->lcp.state) { - case LCP_STATE_CLOSED: state = 'C'; break; - case LCP_STATE_ACK_RCVD: state = 'R'; break; - case LCP_STATE_ACK_SENT: state = 'S'; break; - case LCP_STATE_OPENED: state = 'O'; break; - } - printk (KERN_WARNING "%s: lcp input(%c): %d bytes <%s id=%xh len=%xh", - dev->name, state, len, - sppp_lcp_type_name (h->type), h->ident, ntohs (h->len)); - if (len > 4) - sppp_print_bytes ((u8*) (h+1), len-4); - printk (">\n"); - } - if (len > ntohs (h->len)) - len = ntohs (h->len); - switch (h->type) { - default: - /* Unknown packet type -- send Code-Reject packet. */ - sppp_cp_send (sp, PPP_LCP, LCP_CODE_REJ, ++sp->pp_seq, - skb->len, h); - break; - case LCP_CONF_REQ: - if (len < 4) { - if (sp->pp_flags & PP_DEBUG) - printk (KERN_DEBUG"%s: invalid lcp configure request packet length: %d bytes\n", - dev->name, len); - break; - } - if (len>4 && !sppp_lcp_conf_parse_options (sp, h, len, &rmagic)) - goto badreq; - if (rmagic == sp->lcp.magic) { - /* Local and remote magics equal -- loopback? */ - if (sp->pp_loopcnt >= MAXALIVECNT*5) { - printk (KERN_WARNING "%s: loopback\n", - dev->name); - sp->pp_loopcnt = 0; - if (dev->flags & IFF_UP) { - if_down (dev); - } - } else if (sp->pp_flags & PP_DEBUG) - printk (KERN_DEBUG "%s: conf req: magic glitch\n", - dev->name); - ++sp->pp_loopcnt; - - /* MUST send Conf-Nack packet. */ - rmagic = ~sp->lcp.magic; - opt[0] = LCP_OPT_MAGIC; - opt[1] = sizeof (opt); - opt[2] = rmagic >> 24; - opt[3] = rmagic >> 16; - opt[4] = rmagic >> 8; - opt[5] = rmagic; - sppp_cp_send (sp, PPP_LCP, LCP_CONF_NAK, - h->ident, sizeof (opt), &opt); -badreq: - switch (sp->lcp.state) { - case LCP_STATE_OPENED: - /* Initiate renegotiation. */ - sppp_lcp_open (sp); - /* fall through... */ - case LCP_STATE_ACK_SENT: - /* Go to closed state. */ - sp->lcp.state = LCP_STATE_CLOSED; - sp->ipcp.state = IPCP_STATE_CLOSED; - } - break; - } - /* Send Configure-Ack packet. */ - sp->pp_loopcnt = 0; - if (sp->lcp.state != LCP_STATE_OPENED) { - sppp_cp_send (sp, PPP_LCP, LCP_CONF_ACK, - h->ident, len-4, h+1); - } - /* Change the state. */ - switch (sp->lcp.state) { - case LCP_STATE_CLOSED: - sp->lcp.state = LCP_STATE_ACK_SENT; - break; - case LCP_STATE_ACK_RCVD: - sp->lcp.state = LCP_STATE_OPENED; - sppp_ipcp_open (sp); - break; - case LCP_STATE_OPENED: - /* Remote magic changed -- close session. */ - sp->lcp.state = LCP_STATE_CLOSED; - sp->ipcp.state = IPCP_STATE_CLOSED; - /* Initiate renegotiation. */ - sppp_lcp_open (sp); - /* Send ACK after our REQ in attempt to break loop */ - sppp_cp_send (sp, PPP_LCP, LCP_CONF_ACK, - h->ident, len-4, h+1); - sp->lcp.state = LCP_STATE_ACK_SENT; - break; - } - break; - case LCP_CONF_ACK: - if (h->ident != sp->lcp.confid) - break; - sppp_clear_timeout (sp); - if ((sp->pp_link_state != SPPP_LINK_UP) && - (dev->flags & IFF_UP)) { - /* Coming out of loopback mode. */ - sp->pp_link_state=SPPP_LINK_UP; - printk (KERN_INFO "%s: protocol up\n", dev->name); - } - switch (sp->lcp.state) { - case LCP_STATE_CLOSED: - sp->lcp.state = LCP_STATE_ACK_RCVD; - sppp_set_timeout (sp, 5); - break; - case LCP_STATE_ACK_SENT: - sp->lcp.state = LCP_STATE_OPENED; - sppp_ipcp_open (sp); - break; - } - break; - case LCP_CONF_NAK: - if (h->ident != sp->lcp.confid) - break; - p = (u8*) (h+1); - if (len>=10 && p[0] == LCP_OPT_MAGIC && p[1] >= 4) { - rmagic = (u32)p[2] << 24 | - (u32)p[3] << 16 | p[4] << 8 | p[5]; - if (rmagic == ~sp->lcp.magic) { - int newmagic; - if (sp->pp_flags & PP_DEBUG) - printk (KERN_DEBUG "%s: conf nak: magic glitch\n", - dev->name); - get_random_bytes(&newmagic, sizeof(newmagic)); - sp->lcp.magic += newmagic; - } else - sp->lcp.magic = rmagic; - } - if (sp->lcp.state != LCP_STATE_ACK_SENT) { - /* Go to closed state. */ - sp->lcp.state = LCP_STATE_CLOSED; - sp->ipcp.state = IPCP_STATE_CLOSED; - } - /* The link will be renegotiated after timeout, - * to avoid endless req-nack loop. */ - sppp_clear_timeout (sp); - sppp_set_timeout (sp, 2); - break; - case LCP_CONF_REJ: - if (h->ident != sp->lcp.confid) - break; - sppp_clear_timeout (sp); - /* Initiate renegotiation. */ - sppp_lcp_open (sp); - if (sp->lcp.state != LCP_STATE_ACK_SENT) { - /* Go to closed state. */ - sp->lcp.state = LCP_STATE_CLOSED; - sp->ipcp.state = IPCP_STATE_CLOSED; - } - break; - case LCP_TERM_REQ: - sppp_clear_timeout (sp); - /* Send Terminate-Ack packet. */ - sppp_cp_send (sp, PPP_LCP, LCP_TERM_ACK, h->ident, 0, NULL); - /* Go to closed state. */ - sp->lcp.state = LCP_STATE_CLOSED; - sp->ipcp.state = IPCP_STATE_CLOSED; - /* Initiate renegotiation. */ - sppp_lcp_open (sp); - break; - case LCP_TERM_ACK: - case LCP_CODE_REJ: - case LCP_PROTO_REJ: - /* Ignore for now. */ - break; - case LCP_DISC_REQ: - /* Discard the packet. */ - break; - case LCP_ECHO_REQ: - if (sp->lcp.state != LCP_STATE_OPENED) - break; - if (len < 8) { - if (sp->pp_flags & PP_DEBUG) - printk (KERN_WARNING "%s: invalid lcp echo request packet length: %d bytes\n", - dev->name, len); - break; - } - if (ntohl (*(__be32*)(h+1)) == sp->lcp.magic) { - /* Line loopback mode detected. */ - printk (KERN_WARNING "%s: loopback\n", dev->name); - if_down (dev); - - /* Shut down the PPP link. */ - sp->lcp.state = LCP_STATE_CLOSED; - sp->ipcp.state = IPCP_STATE_CLOSED; - sppp_clear_timeout (sp); - /* Initiate negotiation. */ - sppp_lcp_open (sp); - break; - } - *(__be32 *)(h+1) = htonl (sp->lcp.magic); - sppp_cp_send (sp, PPP_LCP, LCP_ECHO_REPLY, h->ident, len-4, h+1); - break; - case LCP_ECHO_REPLY: - if (h->ident != sp->lcp.echoid) - break; - if (len < 8) { - if (sp->pp_flags & PP_DEBUG) - printk (KERN_WARNING "%s: invalid lcp echo reply packet length: %d bytes\n", - dev->name, len); - break; - } - if (ntohl(*(__be32 *)(h+1)) != sp->lcp.magic) - sp->pp_alivecnt = 0; - break; - } -} - -/* - * Handle incoming Cisco keepalive protocol packets. - */ - -static void sppp_cisco_input (struct sppp *sp, struct sk_buff *skb) -{ - struct cisco_packet *h; - struct net_device *dev = sp->pp_if; - - if (!pskb_may_pull(skb, sizeof(struct cisco_packet)) - || (skb->len != CISCO_PACKET_LEN - && skb->len != CISCO_BIG_PACKET_LEN)) { - if (sp->pp_flags & PP_DEBUG) - printk (KERN_WARNING "%s: invalid cisco packet length: %d bytes\n", - dev->name, skb->len); - return; - } - h = (struct cisco_packet *)skb->data; - skb_pull(skb, sizeof(struct cisco_packet*)); - if (sp->pp_flags & PP_DEBUG) - printk (KERN_WARNING "%s: cisco input: %d bytes <%xh %xh %xh %xh %xh-%xh>\n", - dev->name, skb->len, - ntohl (h->type), h->par1, h->par2, h->rel, - h->time0, h->time1); - switch (ntohl (h->type)) { - default: - if (sp->pp_flags & PP_DEBUG) - printk (KERN_WARNING "%s: unknown cisco packet type: 0x%x\n", - dev->name, ntohl (h->type)); - break; - case CISCO_ADDR_REPLY: - /* Reply on address request, ignore */ - break; - case CISCO_KEEPALIVE_REQ: - sp->pp_alivecnt = 0; - sp->pp_rseq = ntohl (h->par1); - if (sp->pp_seq == sp->pp_rseq) { - /* Local and remote sequence numbers are equal. - * Probably, the line is in loopback mode. */ - int newseq; - if (sp->pp_loopcnt >= MAXALIVECNT) { - printk (KERN_WARNING "%s: loopback\n", - dev->name); - sp->pp_loopcnt = 0; - if (dev->flags & IFF_UP) { - if_down (dev); - } - } - ++sp->pp_loopcnt; - - /* Generate new local sequence number */ - get_random_bytes(&newseq, sizeof(newseq)); - sp->pp_seq ^= newseq; - break; - } - sp->pp_loopcnt = 0; - if (sp->pp_link_state==SPPP_LINK_DOWN && - (dev->flags & IFF_UP)) { - sp->pp_link_state=SPPP_LINK_UP; - printk (KERN_INFO "%s: protocol up\n", dev->name); - } - break; - case CISCO_ADDR_REQ: - /* Stolen from net/ipv4/devinet.c -- SIOCGIFADDR ioctl */ - { - __be32 addr = 0, mask = htonl(~0U); /* FIXME: is the mask correct? */ -#ifdef CONFIG_INET - struct in_device *in_dev; - struct in_ifaddr *ifa; - - rcu_read_lock(); - if ((in_dev = __in_dev_get_rcu(dev)) != NULL) - { - for (ifa=in_dev->ifa_list; ifa != NULL; - ifa=ifa->ifa_next) { - if (strcmp(dev->name, ifa->ifa_label) == 0) - { - addr = ifa->ifa_local; - mask = ifa->ifa_mask; - break; - } - } - } - rcu_read_unlock(); -#endif - sppp_cisco_send (sp, CISCO_ADDR_REPLY, ntohl(addr), ntohl(mask)); - break; - } - } -} - - -/* - * Send PPP LCP packet. - */ - -static void sppp_cp_send (struct sppp *sp, u16 proto, u8 type, - u8 ident, u16 len, void *data) -{ - struct ppp_header *h; - struct lcp_header *lh; - struct sk_buff *skb; - struct net_device *dev = sp->pp_if; - - skb=alloc_skb(dev->hard_header_len+PPP_HEADER_LEN+LCP_HEADER_LEN+len, - GFP_ATOMIC); - if (skb==NULL) - return; - - skb_reserve(skb,dev->hard_header_len); - - h = (struct ppp_header *)skb_put(skb, sizeof(struct ppp_header)); - h->address = PPP_ALLSTATIONS; /* broadcast address */ - h->control = PPP_UI; /* Unnumbered Info */ - h->protocol = htons (proto); /* Link Control Protocol */ - - lh = (struct lcp_header *)skb_put(skb, sizeof(struct lcp_header)); - lh->type = type; - lh->ident = ident; - lh->len = htons (LCP_HEADER_LEN + len); - - if (len) - memcpy(skb_put(skb,len),data, len); - - if (sp->pp_flags & PP_DEBUG) { - printk (KERN_WARNING "%s: %s output <%s id=%xh len=%xh", - dev->name, - proto==PPP_LCP ? "lcp" : "ipcp", - proto==PPP_LCP ? sppp_lcp_type_name (lh->type) : - sppp_ipcp_type_name (lh->type), lh->ident, - ntohs (lh->len)); - if (len) - sppp_print_bytes ((u8*) (lh+1), len); - printk (">\n"); - } - /* Control is high priority so it doesn't get queued behind data */ - skb->priority=TC_PRIO_CONTROL; - skb->dev = dev; - skb_queue_tail(&tx_queue, skb); -} - -/* - * Send Cisco keepalive packet. - */ - -static void sppp_cisco_send (struct sppp *sp, int type, u32 par1, u32 par2) -{ - struct ppp_header *h; - struct cisco_packet *ch; - struct sk_buff *skb; - struct net_device *dev = sp->pp_if; - u32 t = jiffies * 1000/HZ; - - skb=alloc_skb(dev->hard_header_len+PPP_HEADER_LEN+CISCO_PACKET_LEN, - GFP_ATOMIC); - - if(skb==NULL) - return; - - skb_reserve(skb, dev->hard_header_len); - h = (struct ppp_header *)skb_put (skb, sizeof(struct ppp_header)); - h->address = CISCO_MULTICAST; - h->control = 0; - h->protocol = htons (CISCO_KEEPALIVE); - - ch = (struct cisco_packet*)skb_put(skb, CISCO_PACKET_LEN); - ch->type = htonl (type); - ch->par1 = htonl (par1); - ch->par2 = htonl (par2); - ch->rel = htons(0xffff); - ch->time0 = htons ((u16) (t >> 16)); - ch->time1 = htons ((u16) t); - - if (sp->pp_flags & PP_DEBUG) - printk (KERN_WARNING "%s: cisco output: <%xh %xh %xh %xh %xh-%xh>\n", - dev->name, ntohl (ch->type), ch->par1, - ch->par2, ch->rel, ch->time0, ch->time1); - skb->priority=TC_PRIO_CONTROL; - skb->dev = dev; - skb_queue_tail(&tx_queue, skb); -} - -/** - * sppp_close - close down a synchronous PPP or Cisco HDLC link - * @dev: The network device to drop the link of - * - * This drops the logical interface to the channel. It is not - * done politely as we assume we will also be dropping DTR. Any - * timeouts are killed. - */ - -int sppp_close (struct net_device *dev) -{ - struct sppp *sp = (struct sppp *)sppp_of(dev); - unsigned long flags; - - spin_lock_irqsave(&sp->lock, flags); - sp->pp_link_state = SPPP_LINK_DOWN; - sp->lcp.state = LCP_STATE_CLOSED; - sp->ipcp.state = IPCP_STATE_CLOSED; - sppp_clear_timeout (sp); - spin_unlock_irqrestore(&sp->lock, flags); - - return 0; -} - -EXPORT_SYMBOL(sppp_close); - -/** - * sppp_open - open a synchronous PPP or Cisco HDLC link - * @dev: Network device to activate - * - * Close down any existing synchronous session and commence - * from scratch. In the PPP case this means negotiating LCP/IPCP - * and friends, while for Cisco HDLC we simply need to start sending - * keepalives - */ - -int sppp_open (struct net_device *dev) -{ - struct sppp *sp = (struct sppp *)sppp_of(dev); - unsigned long flags; - - sppp_close(dev); - - spin_lock_irqsave(&sp->lock, flags); - if (!(sp->pp_flags & PP_CISCO)) { - sppp_lcp_open (sp); - } - sp->pp_link_state = SPPP_LINK_DOWN; - spin_unlock_irqrestore(&sp->lock, flags); - sppp_flush_xmit(); - - return 0; -} - -EXPORT_SYMBOL(sppp_open); - -/** - * sppp_reopen - notify of physical link loss - * @dev: Device that lost the link - * - * This function informs the synchronous protocol code that - * the underlying link died (for example a carrier drop on X.21) - * - * We increment the magic numbers to ensure that if the other end - * failed to notice we will correctly start a new session. It happens - * do to the nature of telco circuits is that you can lose carrier on - * one endonly. - * - * Having done this we go back to negotiating. This function may - * be called from an interrupt context. - */ - -int sppp_reopen (struct net_device *dev) -{ - struct sppp *sp = (struct sppp *)sppp_of(dev); - unsigned long flags; - - sppp_close(dev); - - spin_lock_irqsave(&sp->lock, flags); - if (!(sp->pp_flags & PP_CISCO)) - { - sp->lcp.magic = jiffies; - ++sp->pp_seq; - sp->lcp.state = LCP_STATE_CLOSED; - sp->ipcp.state = IPCP_STATE_CLOSED; - /* Give it a moment for the line to settle then go */ - sppp_set_timeout (sp, 1); - } - sp->pp_link_state=SPPP_LINK_DOWN; - spin_unlock_irqrestore(&sp->lock, flags); - - return 0; -} - -EXPORT_SYMBOL(sppp_reopen); - -/** - * sppp_change_mtu - Change the link MTU - * @dev: Device to change MTU on - * @new_mtu: New MTU - * - * Change the MTU on the link. This can only be called with - * the link down. It returns an error if the link is up or - * the mtu is out of range. - */ - -static int sppp_change_mtu(struct net_device *dev, int new_mtu) -{ - if(new_mtu<128||new_mtu>PPP_MTU||(dev->flags&IFF_UP)) - return -EINVAL; - dev->mtu=new_mtu; - return 0; -} - -/** - * sppp_do_ioctl - Ioctl handler for ppp/hdlc - * @dev: Device subject to ioctl - * @ifr: Interface request block from the user - * @cmd: Command that is being issued - * - * This function handles the ioctls that may be issued by the user - * to control the settings of a PPP/HDLC link. It does both busy - * and security checks. This function is intended to be wrapped by - * callers who wish to add additional ioctl calls of their own. - */ - -int sppp_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - struct sppp *sp = (struct sppp *)sppp_of(dev); - - if(dev->flags&IFF_UP) - return -EBUSY; - - if(!capable(CAP_NET_ADMIN)) - return -EPERM; - - switch(cmd) - { - case SPPPIOCCISCO: - sp->pp_flags|=PP_CISCO; - dev->type = ARPHRD_HDLC; - break; - case SPPPIOCPPP: - sp->pp_flags&=~PP_CISCO; - dev->type = ARPHRD_PPP; - break; - case SPPPIOCDEBUG: - sp->pp_flags&=~PP_DEBUG; - if(ifr->ifr_flags) - sp->pp_flags|=PP_DEBUG; - break; - case SPPPIOCGFLAGS: - if(copy_to_user(ifr->ifr_data, &sp->pp_flags, sizeof(sp->pp_flags))) - return -EFAULT; - break; - case SPPPIOCSFLAGS: - if(copy_from_user(&sp->pp_flags, ifr->ifr_data, sizeof(sp->pp_flags))) - return -EFAULT; - break; - default: - return -EINVAL; - } - return 0; -} - -EXPORT_SYMBOL(sppp_do_ioctl); - -/** - * sppp_attach - attach synchronous PPP/HDLC to a device - * @pd: PPP device to initialise - * - * This initialises the PPP/HDLC support on an interface. At the - * time of calling the dev element must point to the network device - * that this interface is attached to. The interface should not yet - * be registered. - */ - -void sppp_attach(struct ppp_device *pd) -{ - struct net_device *dev = pd->dev; - struct sppp *sp = &pd->sppp; - unsigned long flags; - - /* Make sure embedding is safe for sppp_of */ - BUG_ON(sppp_of(dev) != sp); - - spin_lock_irqsave(&spppq_lock, flags); - /* Initialize keepalive handler. */ - if (! spppq) - { - init_timer(&sppp_keepalive_timer); - sppp_keepalive_timer.expires=jiffies+10*HZ; - sppp_keepalive_timer.function=sppp_keepalive; - add_timer(&sppp_keepalive_timer); - } - /* Insert new entry into the keepalive list. */ - sp->pp_next = spppq; - spppq = sp; - spin_unlock_irqrestore(&spppq_lock, flags); - - sp->pp_loopcnt = 0; - sp->pp_alivecnt = 0; - sp->pp_seq = 0; - sp->pp_rseq = 0; - sp->pp_flags = PP_KEEPALIVE|PP_CISCO|debug;/*PP_DEBUG;*/ - sp->lcp.magic = 0; - sp->lcp.state = LCP_STATE_CLOSED; - sp->ipcp.state = IPCP_STATE_CLOSED; - sp->pp_if = dev; - spin_lock_init(&sp->lock); - - /* - * Device specific setup. All but interrupt handler and - * hard_start_xmit. - */ - - dev->header_ops = &sppp_header_ops; - - dev->tx_queue_len = 10; - dev->type = ARPHRD_HDLC; - dev->addr_len = 0; - dev->hard_header_len = sizeof(struct ppp_header); - dev->mtu = PPP_MTU; - /* - * These 4 are callers but MUST also call sppp_ functions - */ - dev->do_ioctl = sppp_do_ioctl; -#if 0 - dev->get_stats = NULL; /* Let the driver override these */ - dev->open = sppp_open; - dev->stop = sppp_close; -#endif - dev->change_mtu = sppp_change_mtu; - dev->flags = IFF_MULTICAST|IFF_POINTOPOINT|IFF_NOARP; -} - -EXPORT_SYMBOL(sppp_attach); - -/** - * sppp_detach - release PPP resources from a device - * @dev: Network device to release - * - * Stop and free up any PPP/HDLC resources used by this - * interface. This must be called before the device is - * freed. - */ - -void sppp_detach (struct net_device *dev) -{ - struct sppp **q, *p, *sp = (struct sppp *)sppp_of(dev); - unsigned long flags; - - spin_lock_irqsave(&spppq_lock, flags); - /* Remove the entry from the keepalive list. */ - for (q = &spppq; (p = *q); q = &p->pp_next) - if (p == sp) { - *q = p->pp_next; - break; - } - - /* Stop keepalive handler. */ - if (! spppq) - del_timer(&sppp_keepalive_timer); - sppp_clear_timeout (sp); - spin_unlock_irqrestore(&spppq_lock, flags); -} - -EXPORT_SYMBOL(sppp_detach); - -/* - * Analyze the LCP Configure-Request options list - * for the presence of unknown options. - * If the request contains unknown options, build and - * send Configure-reject packet, containing only unknown options. - */ -static int -sppp_lcp_conf_parse_options (struct sppp *sp, struct lcp_header *h, - int len, u32 *magic) -{ - u8 *buf, *r, *p; - int rlen; - - len -= 4; - buf = r = kmalloc (len, GFP_ATOMIC); - if (! buf) - return (0); - - p = (void*) (h+1); - for (rlen=0; len>1 && p[1]; len-=p[1], p+=p[1]) { - switch (*p) { - case LCP_OPT_MAGIC: - /* Magic number -- extract. */ - if (len >= 6 && p[1] == 6) { - *magic = (u32)p[2] << 24 | - (u32)p[3] << 16 | p[4] << 8 | p[5]; - continue; - } - break; - case LCP_OPT_ASYNC_MAP: - /* Async control character map -- check to be zero. */ - if (len >= 6 && p[1] == 6 && ! p[2] && ! p[3] && - ! p[4] && ! p[5]) - continue; - break; - case LCP_OPT_MRU: - /* Maximum receive unit -- always OK. */ - continue; - default: - /* Others not supported. */ - break; - } - /* Add the option to rejected list. */ - memcpy(r, p, p[1]); - r += p[1]; - rlen += p[1]; - } - if (rlen) - sppp_cp_send (sp, PPP_LCP, LCP_CONF_REJ, h->ident, rlen, buf); - kfree(buf); - return (rlen == 0); -} - -static void sppp_ipcp_input (struct sppp *sp, struct sk_buff *skb) -{ - struct lcp_header *h; - struct net_device *dev = sp->pp_if; - int len = skb->len; - - if (!pskb_may_pull(skb, sizeof(struct lcp_header))) { - if (sp->pp_flags & PP_DEBUG) - printk (KERN_WARNING "%s: invalid ipcp packet length: %d bytes\n", - dev->name, len); - return; - } - h = (struct lcp_header *)skb->data; - skb_pull(skb,sizeof(struct lcp_header)); - if (sp->pp_flags & PP_DEBUG) { - printk (KERN_WARNING "%s: ipcp input: %d bytes <%s id=%xh len=%xh", - dev->name, len, - sppp_ipcp_type_name (h->type), h->ident, ntohs (h->len)); - if (len > 4) - sppp_print_bytes ((u8*) (h+1), len-4); - printk (">\n"); - } - if (len > ntohs (h->len)) - len = ntohs (h->len); - switch (h->type) { - default: - /* Unknown packet type -- send Code-Reject packet. */ - sppp_cp_send (sp, PPP_IPCP, IPCP_CODE_REJ, ++sp->pp_seq, len, h); - break; - case IPCP_CONF_REQ: - if (len < 4) { - if (sp->pp_flags & PP_DEBUG) - printk (KERN_WARNING "%s: invalid ipcp configure request packet length: %d bytes\n", - dev->name, len); - return; - } - if (len > 4) { - sppp_cp_send (sp, PPP_IPCP, LCP_CONF_REJ, h->ident, - len-4, h+1); - - switch (sp->ipcp.state) { - case IPCP_STATE_OPENED: - /* Initiate renegotiation. */ - sppp_ipcp_open (sp); - /* fall through... */ - case IPCP_STATE_ACK_SENT: - /* Go to closed state. */ - sp->ipcp.state = IPCP_STATE_CLOSED; - } - } else { - /* Send Configure-Ack packet. */ - sppp_cp_send (sp, PPP_IPCP, IPCP_CONF_ACK, h->ident, - 0, NULL); - /* Change the state. */ - if (sp->ipcp.state == IPCP_STATE_ACK_RCVD) - sp->ipcp.state = IPCP_STATE_OPENED; - else - sp->ipcp.state = IPCP_STATE_ACK_SENT; - } - break; - case IPCP_CONF_ACK: - if (h->ident != sp->ipcp.confid) - break; - sppp_clear_timeout (sp); - switch (sp->ipcp.state) { - case IPCP_STATE_CLOSED: - sp->ipcp.state = IPCP_STATE_ACK_RCVD; - sppp_set_timeout (sp, 5); - break; - case IPCP_STATE_ACK_SENT: - sp->ipcp.state = IPCP_STATE_OPENED; - break; - } - break; - case IPCP_CONF_NAK: - case IPCP_CONF_REJ: - if (h->ident != sp->ipcp.confid) - break; - sppp_clear_timeout (sp); - /* Initiate renegotiation. */ - sppp_ipcp_open (sp); - if (sp->ipcp.state != IPCP_STATE_ACK_SENT) - /* Go to closed state. */ - sp->ipcp.state = IPCP_STATE_CLOSED; - break; - case IPCP_TERM_REQ: - /* Send Terminate-Ack packet. */ - sppp_cp_send (sp, PPP_IPCP, IPCP_TERM_ACK, h->ident, 0, NULL); - /* Go to closed state. */ - sp->ipcp.state = IPCP_STATE_CLOSED; - /* Initiate renegotiation. */ - sppp_ipcp_open (sp); - break; - case IPCP_TERM_ACK: - /* Ignore for now. */ - case IPCP_CODE_REJ: - /* Ignore for now. */ - break; - } -} - -static void sppp_lcp_open (struct sppp *sp) -{ - char opt[6]; - - if (! sp->lcp.magic) - sp->lcp.magic = jiffies; - opt[0] = LCP_OPT_MAGIC; - opt[1] = sizeof (opt); - opt[2] = sp->lcp.magic >> 24; - opt[3] = sp->lcp.magic >> 16; - opt[4] = sp->lcp.magic >> 8; - opt[5] = sp->lcp.magic; - sp->lcp.confid = ++sp->pp_seq; - sppp_cp_send (sp, PPP_LCP, LCP_CONF_REQ, sp->lcp.confid, - sizeof (opt), &opt); - sppp_set_timeout (sp, 2); -} - -static void sppp_ipcp_open (struct sppp *sp) -{ - sp->ipcp.confid = ++sp->pp_seq; - sppp_cp_send (sp, PPP_IPCP, IPCP_CONF_REQ, sp->ipcp.confid, 0, NULL); - sppp_set_timeout (sp, 2); -} - -/* - * Process PPP control protocol timeouts. - */ - -static void sppp_cp_timeout (unsigned long arg) -{ - struct sppp *sp = (struct sppp*) arg; - unsigned long flags; - - spin_lock_irqsave(&sp->lock, flags); - - sp->pp_flags &= ~PP_TIMO; - if (! (sp->pp_if->flags & IFF_UP) || (sp->pp_flags & PP_CISCO)) { - spin_unlock_irqrestore(&sp->lock, flags); - return; - } - switch (sp->lcp.state) { - case LCP_STATE_CLOSED: - /* No ACK for Configure-Request, retry. */ - sppp_lcp_open (sp); - break; - case LCP_STATE_ACK_RCVD: - /* ACK got, but no Configure-Request for peer, retry. */ - sppp_lcp_open (sp); - sp->lcp.state = LCP_STATE_CLOSED; - break; - case LCP_STATE_ACK_SENT: - /* ACK sent but no ACK for Configure-Request, retry. */ - sppp_lcp_open (sp); - break; - case LCP_STATE_OPENED: - /* LCP is already OK, try IPCP. */ - switch (sp->ipcp.state) { - case IPCP_STATE_CLOSED: - /* No ACK for Configure-Request, retry. */ - sppp_ipcp_open (sp); - break; - case IPCP_STATE_ACK_RCVD: - /* ACK got, but no Configure-Request for peer, retry. */ - sppp_ipcp_open (sp); - sp->ipcp.state = IPCP_STATE_CLOSED; - break; - case IPCP_STATE_ACK_SENT: - /* ACK sent but no ACK for Configure-Request, retry. */ - sppp_ipcp_open (sp); - break; - case IPCP_STATE_OPENED: - /* IPCP is OK. */ - break; - } - break; - } - spin_unlock_irqrestore(&sp->lock, flags); - sppp_flush_xmit(); -} - -static char *sppp_lcp_type_name (u8 type) -{ - static char buf [8]; - switch (type) { - case LCP_CONF_REQ: return ("conf-req"); - case LCP_CONF_ACK: return ("conf-ack"); - case LCP_CONF_NAK: return ("conf-nack"); - case LCP_CONF_REJ: return ("conf-rej"); - case LCP_TERM_REQ: return ("term-req"); - case LCP_TERM_ACK: return ("term-ack"); - case LCP_CODE_REJ: return ("code-rej"); - case LCP_PROTO_REJ: return ("proto-rej"); - case LCP_ECHO_REQ: return ("echo-req"); - case LCP_ECHO_REPLY: return ("echo-reply"); - case LCP_DISC_REQ: return ("discard-req"); - } - sprintf (buf, "%xh", type); - return (buf); -} - -static char *sppp_ipcp_type_name (u8 type) -{ - static char buf [8]; - switch (type) { - case IPCP_CONF_REQ: return ("conf-req"); - case IPCP_CONF_ACK: return ("conf-ack"); - case IPCP_CONF_NAK: return ("conf-nack"); - case IPCP_CONF_REJ: return ("conf-rej"); - case IPCP_TERM_REQ: return ("term-req"); - case IPCP_TERM_ACK: return ("term-ack"); - case IPCP_CODE_REJ: return ("code-rej"); - } - sprintf (buf, "%xh", type); - return (buf); -} - -static void sppp_print_bytes (u_char *p, u16 len) -{ - printk (" %x", *p++); - while (--len > 0) - printk ("-%x", *p++); -} - -/** - * sppp_rcv - receive and process a WAN PPP frame - * @skb: The buffer to process - * @dev: The device it arrived on - * @p: Unused - * @orig_dev: Unused - * - * Protocol glue. This drives the deferred processing mode the poorer - * cards use. This can be called directly by cards that do not have - * timing constraints but is normally called from the network layer - * after interrupt servicing to process frames queued via netif_rx. - */ - -static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) -{ - if (dev_net(dev) != &init_net) { - kfree_skb(skb); - return 0; - } - - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) - return NET_RX_DROP; - sppp_input(dev,skb); - return 0; -} - -static struct packet_type sppp_packet_type = { - .type = __constant_htons(ETH_P_WAN_PPP), - .func = sppp_rcv, -}; - -static char banner[] __initdata = - KERN_INFO "Cronyx Ltd, Synchronous PPP and CISCO HDLC (c) 1994\n" - KERN_INFO "Linux port (c) 1998 Building Number Three Ltd & " - "Jan \"Yenya\" Kasprzak.\n"; - -static int __init sync_ppp_init(void) -{ - if(debug) - debug=PP_DEBUG; - printk(banner); - skb_queue_head_init(&tx_queue); - dev_add_pack(&sppp_packet_type); - return 0; -} - - -static void __exit sync_ppp_cleanup(void) -{ - dev_remove_pack(&sppp_packet_type); -} - -module_init(sync_ppp_init); -module_exit(sync_ppp_cleanup); -module_param(debug, int, 0); -MODULE_LICENSE("GPL"); - diff --git a/include/net/syncppp.h b/include/net/syncppp.h deleted file mode 100644 index 9e306f7f579a..000000000000 --- a/include/net/syncppp.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Defines for synchronous PPP/Cisco link level subroutines. - * - * Copyright (C) 1994 Cronyx Ltd. - * Author: Serge Vakulenko, - * - * This software is distributed with NO WARRANTIES, not even the implied - * warranties for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * - * Authors grant any other persons or organizations permission to use - * or modify this software as long as this message is kept with the software, - * all derivative works or modified versions. - * - * Version 1.7, Wed Jun 7 22:12:02 MSD 1995 - * - * - * - */ - -#ifndef _SYNCPPP_H_ -#define _SYNCPPP_H_ 1 - -#ifdef __KERNEL__ -struct slcp { - u16 state; /* state machine */ - u32 magic; /* local magic number */ - u_char echoid; /* id of last keepalive echo request */ - u_char confid; /* id of last configuration request */ -}; - -struct sipcp { - u16 state; /* state machine */ - u_char confid; /* id of last configuration request */ -}; - -struct sppp -{ - struct sppp * pp_next; /* next interface in keepalive list */ - u32 pp_flags; /* use Cisco protocol instead of PPP */ - u16 pp_alivecnt; /* keepalive packets counter */ - u16 pp_loopcnt; /* loopback detection counter */ - u32 pp_seq; /* local sequence number */ - u32 pp_rseq; /* remote sequence number */ - struct slcp lcp; /* LCP params */ - struct sipcp ipcp; /* IPCP params */ - struct timer_list pp_timer; - struct net_device *pp_if; - char pp_link_state; /* Link status */ - spinlock_t lock; -}; - -struct ppp_device -{ - struct net_device *dev; /* Network device pointer */ - struct sppp sppp; /* Synchronous PPP */ -}; - -static inline struct sppp *sppp_of(struct net_device *dev) -{ - struct ppp_device **ppp = dev->ml_priv; - BUG_ON((*ppp)->dev != dev); - return &(*ppp)->sppp; -} - -#define PP_KEEPALIVE 0x01 /* use keepalive protocol */ -#define PP_CISCO 0x02 /* use Cisco protocol instead of PPP */ -#define PP_TIMO 0x04 /* cp_timeout routine active */ -#define PP_DEBUG 0x08 - -#define PPP_MTU 1500 /* max. transmit unit */ - -#define LCP_STATE_CLOSED 0 /* LCP state: closed (conf-req sent) */ -#define LCP_STATE_ACK_RCVD 1 /* LCP state: conf-ack received */ -#define LCP_STATE_ACK_SENT 2 /* LCP state: conf-ack sent */ -#define LCP_STATE_OPENED 3 /* LCP state: opened */ - -#define IPCP_STATE_CLOSED 0 /* IPCP state: closed (conf-req sent) */ -#define IPCP_STATE_ACK_RCVD 1 /* IPCP state: conf-ack received */ -#define IPCP_STATE_ACK_SENT 2 /* IPCP state: conf-ack sent */ -#define IPCP_STATE_OPENED 3 /* IPCP state: opened */ - -#define SPPP_LINK_DOWN 0 /* link down - no keepalive */ -#define SPPP_LINK_UP 1 /* link is up - keepalive ok */ - -void sppp_attach (struct ppp_device *pd); -void sppp_detach (struct net_device *dev); -int sppp_do_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd); -struct sk_buff *sppp_dequeue (struct net_device *dev); -int sppp_isempty (struct net_device *dev); -void sppp_flush (struct net_device *dev); -int sppp_open (struct net_device *dev); -int sppp_reopen (struct net_device *dev); -int sppp_close (struct net_device *dev); -#endif - -#define SPPPIOCCISCO (SIOCDEVPRIVATE) -#define SPPPIOCPPP (SIOCDEVPRIVATE+1) -#define SPPPIOCDEBUG (SIOCDEVPRIVATE+2) -#define SPPPIOCSFLAGS (SIOCDEVPRIVATE+3) -#define SPPPIOCGFLAGS (SIOCDEVPRIVATE+4) - -#endif /* _SYNCPPP_H_ */ -- cgit v1.2.3 From f201ae2356c74bcae130b2177b3dca903ea98071 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 23 Nov 2008 06:22:56 +0100 Subject: tracing/function-return-tracer: store return stack into task_struct and allocate it dynamically Impact: use deeper function tracing depth safely Some tests showed that function return tracing needed a more deeper depth of function calls. But it could be unsafe to store these return addresses to the stack. So these arrays will now be allocated dynamically into task_struct of current only when the tracer is activated. Typical scheme when tracer is activated: - allocate a return stack for each task in global list. - fork: allocate the return stack for the newly created task - exit: free return stack of current - idle init: same as fork I chose a default depth of 50. I don't have overruns anymore. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ftrace.h | 1 - arch/x86/include/asm/thread_info.h | 29 ------------ arch/x86/kernel/ftrace.c | 29 ++++++------ include/linux/ftrace.h | 5 ++ include/linux/sched.h | 23 +++++---- kernel/exit.c | 5 +- kernel/fork.c | 4 ++ kernel/sched.c | 3 ++ kernel/trace/ftrace.c | 96 +++++++++++++++++++++++++++++++++++++- 9 files changed, 137 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 2bb43b433e07..754a3e082f94 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -29,7 +29,6 @@ struct dyn_arch_ftrace { #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_RET_TRACER -#define FTRACE_RET_STACK_SIZE 20 #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e90e81ef6ab9..0921b4018c11 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -40,36 +40,8 @@ struct thread_info { */ __u8 supervisor_stack[0]; #endif - -#ifdef CONFIG_FUNCTION_RET_TRACER - /* Index of current stored adress in ret_stack */ - int curr_ret_stack; - /* Stack of return addresses for return function tracing */ - struct ftrace_ret_stack ret_stack[FTRACE_RET_STACK_SIZE]; - /* - * Number of functions that haven't been traced - * because of depth overrun. - */ - atomic_t trace_overrun; -#endif }; -#ifdef CONFIG_FUNCTION_RET_TRACER -#define INIT_THREAD_INFO(tsk) \ -{ \ - .task = &tsk, \ - .exec_domain = &default_exec_domain, \ - .flags = 0, \ - .cpu = 0, \ - .preempt_count = 1, \ - .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ - .curr_ret_stack = -1,\ - .trace_overrun = ATOMIC_INIT(0) \ -} -#else #define INIT_THREAD_INFO(tsk) \ { \ .task = &tsk, \ @@ -82,7 +54,6 @@ struct thread_info { .fn = do_no_restart_syscall, \ }, \ } -#endif #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 356bb1eb6e9a..bb137f7297ed 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -350,19 +350,21 @@ static int push_return_trace(unsigned long ret, unsigned long long time, unsigned long func) { int index; - struct thread_info *ti = current_thread_info(); + + if (!current->ret_stack) + return -EBUSY; /* The return trace stack is full */ - if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) { - atomic_inc(&ti->trace_overrun); + if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { + atomic_inc(¤t->trace_overrun); return -EBUSY; } - index = ++ti->curr_ret_stack; + index = ++current->curr_ret_stack; barrier(); - ti->ret_stack[index].ret = ret; - ti->ret_stack[index].func = func; - ti->ret_stack[index].calltime = time; + current->ret_stack[index].ret = ret; + current->ret_stack[index].func = func; + current->ret_stack[index].calltime = time; return 0; } @@ -373,13 +375,12 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time, { int index; - struct thread_info *ti = current_thread_info(); - index = ti->curr_ret_stack; - *ret = ti->ret_stack[index].ret; - *func = ti->ret_stack[index].func; - *time = ti->ret_stack[index].calltime; - *overrun = atomic_read(&ti->trace_overrun); - ti->curr_ret_stack--; + index = current->curr_ret_stack; + *ret = current->ret_stack[index].ret; + *func = current->ret_stack[index].func; + *time = current->ret_stack[index].calltime; + *overrun = atomic_read(¤t->trace_overrun); + current->curr_ret_stack--; } /* diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f7ba4ea5e128..2ba259b2defa 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -323,6 +323,8 @@ struct ftrace_retfunc { }; #ifdef CONFIG_FUNCTION_RET_TRACER +#define FTRACE_RETFUNC_DEPTH 50 +#define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of a callback handler of tracing return function */ typedef void (*trace_function_return_t)(struct ftrace_retfunc *); @@ -330,6 +332,9 @@ extern int register_ftrace_return(trace_function_return_t func); /* The current handler in use */ extern trace_function_return_t ftrace_function_return; extern void unregister_ftrace_return(void); + +extern void ftrace_retfunc_init_task(struct task_struct *t); +extern void ftrace_retfunc_exit_task(struct task_struct *t); #endif #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index c8e0db464206..bee1e93c95ad 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1352,6 +1352,17 @@ struct task_struct { unsigned long default_timer_slack_ns; struct list_head *scm_work_list; +#ifdef CONFIG_FUNCTION_RET_TRACER + /* Index of current stored adress in ret_stack */ + int curr_ret_stack; + /* Stack of return addresses for return function tracing */ + struct ftrace_ret_stack *ret_stack; + /* + * Number of functions that haven't been traced + * because of depth overrun. + */ + atomic_t trace_overrun; +#endif }; /* @@ -2006,18 +2017,6 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct { *task_thread_info(p) = *task_thread_info(org); task_thread_info(p)->task = p; - -#ifdef CONFIG_FUNCTION_RET_TRACER - /* - * When fork() creates a child process, this function is called. - * But the child task may not inherit the return adresses traced - * by the return function tracer because it will directly execute - * in userspace and will not return to kernel functions its parent - * used. - */ - task_thread_info(p)->curr_ret_stack = -1; - atomic_set(&task_thread_info(p)->trace_overrun, 0); -#endif } static inline unsigned long *end_of_stack(struct task_struct *p) diff --git a/kernel/exit.c b/kernel/exit.c index 35c8ec2ba03a..b9d446329da1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -1127,7 +1128,9 @@ NORET_TYPE void do_exit(long code) preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; - +#ifdef CONFIG_FUNCTION_RET_TRACER + ftrace_retfunc_exit_task(tsk); +#endif schedule(); BUG(); /* Avoid "noreturn function does return". */ diff --git a/kernel/fork.c b/kernel/fork.c index ac62f43ee430..d1eb30e69ccc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -1269,6 +1270,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); +#ifdef CONFIG_FUNCTION_RET_TRACER + ftrace_retfunc_init_task(p); +#endif proc_fork_connector(p); cgroup_post_fork(p); return p; diff --git a/kernel/sched.c b/kernel/sched.c index 4de56108c86f..fb17205950de 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5901,6 +5901,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * The idle tasks have their own, simple scheduling class: */ idle->sched_class = &idle_sched_class; +#ifdef CONFIG_FUNCTION_RET_TRACER + ftrace_retfunc_init_task(idle); +#endif } /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f212da486689..90d99fb02ae4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1498,10 +1498,77 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_RET_TRACER +static atomic_t ftrace_retfunc_active; + /* The callback that hooks the return of a function */ trace_function_return_t ftrace_function_return = (trace_function_return_t)ftrace_stub; + +/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ +static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) +{ + int i; + int ret = 0; + unsigned long flags; + int start = 0, end = FTRACE_RETSTACK_ALLOC_SIZE; + struct task_struct *g, *t; + + for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) { + ret_stack_list[i] = kmalloc(FTRACE_RETFUNC_DEPTH + * sizeof(struct ftrace_ret_stack), + GFP_KERNEL); + if (!ret_stack_list[i]) { + start = 0; + end = i; + ret = -ENOMEM; + goto free; + } + } + + read_lock_irqsave(&tasklist_lock, flags); + do_each_thread(g, t) { + if (start == end) { + ret = -EAGAIN; + goto unlock; + } + + if (t->ret_stack == NULL) { + t->ret_stack = ret_stack_list[start++]; + t->curr_ret_stack = -1; + atomic_set(&t->trace_overrun, 0); + } + } while_each_thread(g, t); + +unlock: + read_unlock_irqrestore(&tasklist_lock, flags); +free: + for (i = start; i < end; i++) + kfree(ret_stack_list[i]); + return ret; +} + +/* Allocate a return stack for each task */ +static int start_return_tracing(void) +{ + struct ftrace_ret_stack **ret_stack_list; + int ret; + + ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE * + sizeof(struct ftrace_ret_stack *), + GFP_KERNEL); + + if (!ret_stack_list) + return -ENOMEM; + + do { + ret = alloc_retstack_tasklist(ret_stack_list); + } while (ret == -EAGAIN); + + kfree(ret_stack_list); + return ret; +} + int register_ftrace_return(trace_function_return_t func) { int ret = 0; @@ -1516,7 +1583,12 @@ int register_ftrace_return(trace_function_return_t func) ret = -EBUSY; goto out; } - + atomic_inc(&ftrace_retfunc_active); + ret = start_return_tracing(); + if (ret) { + atomic_dec(&ftrace_retfunc_active); + goto out; + } ftrace_tracing_type = FTRACE_TYPE_RETURN; ftrace_function_return = func; ftrace_startup(); @@ -1530,6 +1602,7 @@ void unregister_ftrace_return(void) { mutex_lock(&ftrace_sysctl_lock); + atomic_dec(&ftrace_retfunc_active); ftrace_function_return = (trace_function_return_t)ftrace_stub; ftrace_shutdown(); /* Restore normal tracing type */ @@ -1537,6 +1610,27 @@ void unregister_ftrace_return(void) mutex_unlock(&ftrace_sysctl_lock); } + +/* Allocate a return stack for newly created task */ +void ftrace_retfunc_init_task(struct task_struct *t) +{ + if (atomic_read(&ftrace_retfunc_active)) { + t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH + * sizeof(struct ftrace_ret_stack), + GFP_KERNEL); + if (!t->ret_stack) + return; + t->curr_ret_stack = -1; + atomic_set(&t->trace_overrun, 0); + } else + t->ret_stack = NULL; +} + +void ftrace_retfunc_exit_task(struct task_struct *t) +{ + kfree(t->ret_stack); + t->ret_stack = NULL; +} #endif -- cgit v1.2.3 From 82f60f0bc854aada696f27d863c03bef91f1509d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 23 Nov 2008 09:18:56 +0100 Subject: tracing/function-return-tracer: clean up task start/exit callbacks Impact: cleanup Eliminate #ifdefs in core code by using empty inline functions. Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 +++ kernel/exit.c | 2 -- kernel/fork.c | 2 -- kernel/sched.c | 2 -- 4 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2ba259b2defa..938ca1942641 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -335,6 +335,9 @@ extern void unregister_ftrace_return(void); extern void ftrace_retfunc_init_task(struct task_struct *t); extern void ftrace_retfunc_exit_task(struct task_struct *t); +#else +static inline void ftrace_retfunc_init_task(struct task_struct *t) { } +static inline void ftrace_retfunc_exit_task(struct task_struct *t) { } #endif #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/exit.c b/kernel/exit.c index b9d446329da1..ef04d03b3286 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1128,9 +1128,7 @@ NORET_TYPE void do_exit(long code) preempt_disable(); /* causes final put_task_struct in finish_task_switch(). */ tsk->state = TASK_DEAD; -#ifdef CONFIG_FUNCTION_RET_TRACER ftrace_retfunc_exit_task(tsk); -#endif schedule(); BUG(); /* Avoid "noreturn function does return". */ diff --git a/kernel/fork.c b/kernel/fork.c index d1eb30e69ccc..fbf4a4c0a628 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1270,9 +1270,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); -#ifdef CONFIG_FUNCTION_RET_TRACER ftrace_retfunc_init_task(p); -#endif proc_fork_connector(p); cgroup_post_fork(p); return p; diff --git a/kernel/sched.c b/kernel/sched.c index fb17205950de..388d9db044ab 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5901,9 +5901,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * The idle tasks have their own, simple scheduling class: */ idle->sched_class = &idle_sched_class; -#ifdef CONFIG_FUNCTION_RET_TRACER ftrace_retfunc_init_task(idle); -#endif } /* -- cgit v1.2.3 From 02b67518e2b1c490787dac7f35e1204e74fe21ba Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Sat, 22 Nov 2008 13:28:47 +0200 Subject: tracing: add support for userspace stacktraces in tracing/iter_ctrl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: add new (default-off) tracing visualization feature Usage example: mount -t debugfs nodev /sys/kernel/debug cd /sys/kernel/debug/tracing echo userstacktrace >iter_ctrl echo sched_switch >current_tracer echo 1 >tracing_enabled .... run application ... echo 0 >tracing_enabled Then read one of 'trace','latency_trace','trace_pipe'. To get the best output you can compile your userspace programs with frame pointers (at least glibc + the app you are tracing). Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- Documentation/ftrace.txt | 5 ++- arch/x86/kernel/stacktrace.c | 57 +++++++++++++++++++++++++++ include/linux/stacktrace.h | 8 ++++ kernel/trace/trace.c | 93 ++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.h | 9 +++++ 5 files changed, 171 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt index 753f4de4b175..79a80f79c062 100644 --- a/Documentation/ftrace.txt +++ b/Documentation/ftrace.txt @@ -324,7 +324,7 @@ output. To see what is available, simply cat the file: cat /debug/tracing/trace_options print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ - noblock nostacktrace nosched-tree + noblock nostacktrace nosched-tree nouserstacktrace To disable one of the options, echo in the option prepended with "no". @@ -378,6 +378,9 @@ Here are the available options: When a trace is recorded, so is the stack of functions. This allows for back traces of trace sites. + userstacktrace - This option changes the trace. + It records a stacktrace of the current userspace thread. + sched-tree - TBD (any users??) diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index a03e7f6d90c3..b15153060417 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -6,6 +6,7 @@ #include #include #include +#include #include static void save_stack_warning(void *data, char *msg) @@ -83,3 +84,59 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ + +struct stack_frame { + const void __user *next_fp; + unsigned long return_address; +}; + +static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +{ + int ret; + + if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) + return 0; + + ret = 1; + pagefault_disable(); + if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) + ret = 0; + pagefault_enable(); + + return ret; +} + +void save_stack_trace_user(struct stack_trace *trace) +{ + /* + * Trace user stack if we are not a kernel thread + */ + if (current->mm) { + const struct pt_regs *regs = task_pt_regs(current); + const void __user *fp = (const void __user *)regs->bp; + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = regs->ip; + + while (trace->nr_entries < trace->max_entries) { + struct stack_frame frame; + frame.next_fp = NULL; + frame.return_address = 0; + if (!copy_stack_frame(fp, &frame)) + break; + if ((unsigned long)fp < regs->sp) + break; + if (frame.return_address) + trace->entries[trace->nr_entries++] = + frame.return_address; + if (fp == frame.next_fp) + break; + fp = frame.next_fp; + } + } + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index b106fd8e0d5c..68de51468f5d 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -15,9 +15,17 @@ extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); extern void print_stack_trace(struct stack_trace *trace, int spaces); + +#ifdef CONFIG_X86 +extern void save_stack_trace_user(struct stack_trace *trace); +#else +# define save_stack_trace_user(trace) do { } while (0) +#endif + #else # define save_stack_trace(trace) do { } while (0) # define save_stack_trace_tsk(tsk, trace) do { } while (0) +# define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) #endif diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4ee6f0375222..ced8b4fa9f51 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -275,6 +275,7 @@ static const char *trace_options[] = { "ftrace_preempt", "branch", "annotate", + "userstacktrace", NULL }; @@ -918,6 +919,44 @@ void __trace_stack(struct trace_array *tr, ftrace_trace_stack(tr, data, flags, skip, preempt_count()); } +static void ftrace_trace_userstack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags, int pc) +{ + struct userstack_entry *entry; + struct stack_trace trace; + struct ring_buffer_event *event; + unsigned long irq_flags; + + if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) + return; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_USER_STACK; + + memset(&entry->caller, 0, sizeof(entry->caller)); + + trace.nr_entries = 0; + trace.max_entries = FTRACE_STACK_ENTRIES; + trace.skip = 0; + trace.entries = entry->caller; + + save_stack_trace_user(&trace); + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); +} + +void __trace_userstack(struct trace_array *tr, + struct trace_array_cpu *data, + unsigned long flags) +{ + ftrace_trace_userstack(tr, data, flags, preempt_count()); +} + static void ftrace_trace_special(void *__tr, void *__data, unsigned long arg1, unsigned long arg2, unsigned long arg3, @@ -941,6 +980,7 @@ ftrace_trace_special(void *__tr, void *__data, entry->arg3 = arg3; ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, irq_flags, 4, pc); + ftrace_trace_userstack(tr, data, irq_flags, pc); trace_wake_up(); } @@ -979,6 +1019,7 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_cpu = task_cpu(next); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, flags, 5, pc); + ftrace_trace_userstack(tr, data, flags, pc); } void @@ -1008,6 +1049,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_cpu = task_cpu(wakee); ring_buffer_unlock_commit(tr->buffer, event, irq_flags); ftrace_trace_stack(tr, data, flags, 6, pc); + ftrace_trace_userstack(tr, data, flags, pc); trace_wake_up(); } @@ -1387,6 +1429,31 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) return ret; } +static int +seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, + unsigned long sym_flags) +{ + int ret = 1; + unsigned i; + + for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { + unsigned long ip = entry->caller[i]; + + if (ip == ULONG_MAX || !ret) + break; + if (i) + ret = trace_seq_puts(s, " <- "); + if (!ip) { + ret = trace_seq_puts(s, "??"); + continue; + } + if (ret /*&& (sym_flags & TRACE_ITER_SYM_ADDR)*/) + ret = trace_seq_printf(s, " <" IP_FMT ">", ip); + } + + return ret; +} + static void print_lat_help_header(struct seq_file *m) { seq_puts(m, "# _------=> CPU# \n"); @@ -1702,6 +1769,16 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu) field->line); break; } + case TRACE_USER_STACK: { + struct userstack_entry *field; + + trace_assign_type(field, entry); + + seq_print_userip_objs(field, s, sym_flags); + if (entry->flags & TRACE_FLAG_CONT) + trace_seq_print_cont(s, iter); + break; + } default: trace_seq_printf(s, "Unknown type %d\n", entry->type); } @@ -1853,6 +1930,19 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) field->line); break; } + case TRACE_USER_STACK: { + struct userstack_entry *field; + + trace_assign_type(field, entry); + + ret = seq_print_userip_objs(field, s, sym_flags); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + ret = trace_seq_putc(s, '\n'); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + break; + } } return TRACE_TYPE_HANDLED; } @@ -1912,6 +2002,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; @@ -2000,6 +2091,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; @@ -2054,6 +2146,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) break; } case TRACE_SPECIAL: + case TRACE_USER_STACK: case TRACE_STACK: { struct special_entry *field; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2cb12fd98f6b..17bb4c830b01 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -26,6 +26,7 @@ enum trace_type { TRACE_BOOT_CALL, TRACE_BOOT_RET, TRACE_FN_RET, + TRACE_USER_STACK, __TRACE_LAST_TYPE }; @@ -42,6 +43,7 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; + int tgid; }; /* @@ -99,6 +101,11 @@ struct stack_entry { unsigned long caller[FTRACE_STACK_ENTRIES]; }; +struct userstack_entry { + struct trace_entry ent; + unsigned long caller[FTRACE_STACK_ENTRIES]; +}; + /* * ftrace_printk entry: */ @@ -240,6 +247,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \ IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \ IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \ + IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\ IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \ IF_ASSIGN(var, ent, struct special_entry, 0); \ IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \ @@ -500,6 +508,7 @@ enum trace_iterator_flags { TRACE_ITER_PREEMPTONLY = 0x800, TRACE_ITER_BRANCH = 0x1000, TRACE_ITER_ANNOTATE = 0x2000, + TRACE_ITER_USERSTACKTRACE = 0x4000 }; /* -- cgit v1.2.3 From 74e2f334f4440cbcb63e9ebbcdcea430d41bdfa3 Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Sat, 22 Nov 2008 13:28:48 +0200 Subject: vfs, seqfile: make mangle_path() global MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: expose new VFS API make mangle_path() available, as per the suggestions of Christoph Hellwig and Al Viro: http://lkml.org/lkml/2008/11/4/338 Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- fs/seq_file.c | 14 +++++++++++++- include/linux/seq_file.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/seq_file.c b/fs/seq_file.c index eba2eabcd2b8..f5b61cc1cc1a 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -357,7 +357,18 @@ int seq_printf(struct seq_file *m, const char *f, ...) } EXPORT_SYMBOL(seq_printf); -static char *mangle_path(char *s, char *p, char *esc) +/** + * mangle_path - mangle and copy path to buffer beginning + * @s - buffer start + * @p - beginning of path in above buffer + * @esc - set of characters that need escaping + * + * Copy the path from @p to @s, replacing each occurrence of character from + * @esc with usual octal escape. + * Returns pointer past last written character in @s, or NULL in case of + * failure. + */ +char *mangle_path(char *s, char *p, char *esc) { while (s <= p) { char c = *p++; @@ -376,6 +387,7 @@ static char *mangle_path(char *s, char *p, char *esc) } return NULL; } +EXPORT_SYMBOL_GPL(mangle_path); /* * return the absolute path of 'dentry' residing in mount 'mnt'. diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index dc50bcc282a8..b3dfa72f13b9 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -34,6 +34,7 @@ struct seq_operations { #define SEQ_SKIP 1 +char *mangle_path(char *s, char *p, char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); loff_t seq_lseek(struct file *, loff_t, int); -- cgit v1.2.3 From 42f565e116e0408b5ddc21a33c4a4d41fd572420 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Nov 2008 23:57:47 -0500 Subject: trace: remove extra assign in branch check Impact: clean up of branch check The unlikely/likely profiler does an extra assign of the f.line. This is not needed since it is already calculated at compile time. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c7d804a7a4d6..c25e525121f0 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -87,7 +87,6 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); .file = __FILE__, \ .line = __LINE__, \ }; \ - ______f.line = __LINE__; \ ______r = likely_notrace(x); \ ftrace_likely_update(&______f, ______r, 1); \ ______r; \ @@ -102,7 +101,6 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); .file = __FILE__, \ .line = __LINE__, \ }; \ - ______f.line = __LINE__; \ ______r = unlikely_notrace(x); \ ftrace_likely_update(&______f, ______r, 0); \ ______r; \ -- cgit v1.2.3 From 45b797492a0758e64dff74e9db70e1f65e0603a5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 21 Nov 2008 00:40:40 -0500 Subject: trace: consolidate unlikely and likely profiler Impact: clean up to make one profiler of like and unlikely tracer The likely and unlikely profiler prints out the file and line numbers of the annotated branches that it is profiling. It shows the number of times it was correct or incorrect in its guess. Having two different files or sections for that matter to tell us if it was a likely or unlikely is pretty pointless. We really only care if it was correct or not. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 9 +++------ include/linux/compiler.h | 24 +++++------------------- kernel/trace/Kconfig | 3 +-- kernel/trace/trace_branch.c | 39 +++++++++++++-------------------------- 4 files changed, 22 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 3b46ae464933..8bccb49981e5 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -46,12 +46,9 @@ #endif #ifdef CONFIG_TRACE_BRANCH_PROFILING -#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \ - *(_ftrace_likely) \ - VMLINUX_SYMBOL(__stop_likely_profile) = .; \ - VMLINUX_SYMBOL(__start_unlikely_profile) = .; \ - *(_ftrace_unlikely) \ - VMLINUX_SYMBOL(__stop_unlikely_profile) = .; +#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_annotated_branch_profile) = .; \ + *(_ftrace_annotated_branch) \ + VMLINUX_SYMBOL(__stop_annotated_branch_profile) = .; #else #define LIKELY_PROFILE() #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c25e525121f0..0628a2013fae 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -77,32 +77,18 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #define likely_notrace(x) __builtin_expect(!!(x), 1) #define unlikely_notrace(x) __builtin_expect(!!(x), 0) -#define likely_check(x) ({ \ +#define __branch_check__(x, expect) ({ \ int ______r; \ static struct ftrace_branch_data \ __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_likely"))) \ + __attribute__((section("_ftrace_annotated_branch"))) \ ______f = { \ .func = __func__, \ .file = __FILE__, \ .line = __LINE__, \ }; \ ______r = likely_notrace(x); \ - ftrace_likely_update(&______f, ______r, 1); \ - ______r; \ - }) -#define unlikely_check(x) ({ \ - int ______r; \ - static struct ftrace_branch_data \ - __attribute__((__aligned__(4))) \ - __attribute__((section("_ftrace_unlikely"))) \ - ______f = { \ - .func = __func__, \ - .file = __FILE__, \ - .line = __LINE__, \ - }; \ - ______r = unlikely_notrace(x); \ - ftrace_likely_update(&______f, ______r, 0); \ + ftrace_likely_update(&______f, ______r, expect); \ ______r; \ }) @@ -112,10 +98,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); * written by Daniel Walker. */ # ifndef likely -# define likely(x) (__builtin_constant_p(x) ? !!(x) : likely_check(x)) +# define likely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 1)) # endif # ifndef unlikely -# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : unlikely_check(x)) +# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0)) # endif #else # define likely(x) __builtin_expect(!!(x), 1) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index b8378fad29a3..7e3548705708 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -166,8 +166,7 @@ config TRACE_BRANCH_PROFILING This tracer profiles all the the likely and unlikely macros in the kernel. It will display the results in: - /debugfs/tracing/profile_likely - /debugfs/tracing/profile_unlikely + /debugfs/tracing/profile_annotated_branch Note: this will add a significant overhead, only turn this on if you need to profile the system's use of these macros. diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 23f9b02ce967..21dedc8b50a4 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -261,7 +261,7 @@ static struct seq_operations tracing_likely_seq_ops = { .show = t_show, }; -static int tracing_likely_open(struct inode *inode, struct file *file) +static int tracing_branch_open(struct inode *inode, struct file *file) { int ret; @@ -274,25 +274,18 @@ static int tracing_likely_open(struct inode *inode, struct file *file) return ret; } -static struct file_operations tracing_likely_fops = { - .open = tracing_likely_open, +static const struct file_operations tracing_branch_fops = { + .open = tracing_branch_open, .read = seq_read, .llseek = seq_lseek, }; -extern unsigned long __start_likely_profile[]; -extern unsigned long __stop_likely_profile[]; -extern unsigned long __start_unlikely_profile[]; -extern unsigned long __stop_unlikely_profile[]; +extern unsigned long __start_annotated_branch_profile[]; +extern unsigned long __stop_annotated_branch_profile[]; -static struct ftrace_pointer ftrace_likely_pos = { - .start = __start_likely_profile, - .stop = __stop_likely_profile, -}; - -static struct ftrace_pointer ftrace_unlikely_pos = { - .start = __start_unlikely_profile, - .stop = __stop_unlikely_profile, +static const struct ftrace_pointer ftrace_annotated_branch_pos = { + .start = __start_annotated_branch_profile, + .stop = __stop_annotated_branch_profile, }; static __init int ftrace_branch_init(void) @@ -302,18 +295,12 @@ static __init int ftrace_branch_init(void) d_tracer = tracing_init_dentry(); - entry = debugfs_create_file("profile_likely", 0444, d_tracer, - &ftrace_likely_pos, - &tracing_likely_fops); - if (!entry) - pr_warning("Could not create debugfs 'profile_likely' entry\n"); - - entry = debugfs_create_file("profile_unlikely", 0444, d_tracer, - &ftrace_unlikely_pos, - &tracing_likely_fops); + entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer, + &ftrace_annotated_branch_pos, + &tracing_branch_fops); if (!entry) - pr_warning("Could not create debugfs" - " 'profile_unlikely' entry\n"); + pr_warning("Could not create debugfs " + "'profile_annotatet_branch' entry\n"); return 0; } -- cgit v1.2.3 From 2bcd521a684cc94befbe2ce7d5b613c841b0d304 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 21 Nov 2008 01:30:54 -0500 Subject: trace: profile all if conditionals Impact: feature to profile if statements This patch adds a branch profiler for all if () statements. The results will be found in: /debugfs/tracing/profile_branch For example: miss hit % Function File Line ------- --------- - -------- ---- ---- 0 1 100 x86_64_start_reservations head64.c 127 0 1 100 copy_bootdata head64.c 69 1 0 0 x86_64_start_kernel head64.c 111 32 0 0 set_intr_gate desc.h 319 1 0 0 reserve_ebda_region head.c 51 1 0 0 reserve_ebda_region head.c 47 0 1 100 reserve_ebda_region head.c 42 0 0 X maxcpus main.c 165 Miss means the branch was not taken. Hit means the branch was taken. The percent is the percentage the branch was taken. This adds a significant amount of overhead and should only be used by those analyzing their system. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/asm-generic/vmlinux.lds.h | 11 ++++++++++- include/linux/compiler.h | 38 ++++++++++++++++++++++++++++++++++++-- kernel/trace/Kconfig | 16 ++++++++++++++++ kernel/trace/trace_branch.c | 33 +++++++++++++++++++++++++++++++-- 4 files changed, 93 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8bccb49981e5..eba835a2c2cd 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -53,6 +53,14 @@ #define LIKELY_PROFILE() #endif +#ifdef CONFIG_PROFILE_ALL_BRANCHES +#define BRANCH_PROFILE() VMLINUX_SYMBOL(__start_branch_profile) = .; \ + *(_ftrace_branch) \ + VMLINUX_SYMBOL(__stop_branch_profile) = .; +#else +#define BRANCH_PROFILE() +#endif + /* .data section */ #define DATA_DATA \ *(.data) \ @@ -72,7 +80,8 @@ VMLINUX_SYMBOL(__start___tracepoints) = .; \ *(__tracepoints) \ VMLINUX_SYMBOL(__stop___tracepoints) = .; \ - LIKELY_PROFILE() + LIKELY_PROFILE() \ + BRANCH_PROFILE() #define RO_DATA(align) \ . = ALIGN((align)); \ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 0628a2013fae..ea7c6be354b7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -63,8 +63,16 @@ struct ftrace_branch_data { const char *func; const char *file; unsigned line; - unsigned long correct; - unsigned long incorrect; + union { + struct { + unsigned long correct; + unsigned long incorrect; + }; + struct { + unsigned long miss; + unsigned long hit; + }; + }; }; /* @@ -103,6 +111,32 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # ifndef unlikely # define unlikely(x) (__builtin_constant_p(x) ? !!(x) : __branch_check__(x, 0)) # endif + +#ifdef CONFIG_PROFILE_ALL_BRANCHES +/* + * "Define 'is'", Bill Clinton + * "Define 'if'", Steven Rostedt + */ +#define if(cond) if (__builtin_constant_p((cond)) ? !!(cond) : \ + ({ \ + int ______r; \ + static struct ftrace_branch_data \ + __attribute__((__aligned__(4))) \ + __attribute__((section("_ftrace_branch"))) \ + ______f = { \ + .func = __func__, \ + .file = __FILE__, \ + .line = __LINE__, \ + }; \ + ______r = !!(cond); \ + if (______r) \ + ______f.hit++; \ + else \ + ______f.miss++; \ + ______r; \ + })) +#endif /* CONFIG_PROFILE_ALL_BRANCHES */ + #else # define likely(x) __builtin_expect(!!(x), 1) # define unlikely(x) __builtin_expect(!!(x), 0) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 7e3548705708..61e8cca6ff45 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -173,6 +173,22 @@ config TRACE_BRANCH_PROFILING Say N if unsure. +config PROFILE_ALL_BRANCHES + bool "Profile all if conditionals" + depends on TRACE_BRANCH_PROFILING + help + This tracer profiles all branch conditions. Every if () + taken in the kernel is recorded whether it hit or miss. + The results will be displayed in: + + /debugfs/tracing/profile_branch + + This configuration, when enabled, will impose a great overhead + on the system. This should only be enabled when the system + is to be analyzed + + Say N if unsure. + config TRACING_BRANCHES bool help diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 142acb3b4e00..85792aec64d2 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -185,6 +185,7 @@ EXPORT_SYMBOL(ftrace_likely_update); struct ftrace_pointer { void *start; void *stop; + int hit; }; static void * @@ -223,13 +224,17 @@ static void t_stop(struct seq_file *m, void *p) static int t_show(struct seq_file *m, void *v) { + struct ftrace_pointer *fp = m->private; struct ftrace_branch_data *p = v; const char *f; long percent; if (v == (void *)1) { - seq_printf(m, " correct incorrect %% " - " Function " + if (fp->hit) + seq_printf(m, " miss hit %% "); + else + seq_printf(m, " correct incorrect %% "); + seq_printf(m, " Function " " File Line\n" " ------- --------- - " " -------- " @@ -243,6 +248,9 @@ static int t_show(struct seq_file *m, void *v) f--; f++; + /* + * The miss is overlayed on correct, and hit on incorrect. + */ if (p->correct) { percent = p->incorrect * 100; percent /= p->correct + p->incorrect; @@ -284,6 +292,18 @@ static const struct file_operations tracing_branch_fops = { .llseek = seq_lseek, }; +#ifdef CONFIG_PROFILE_ALL_BRANCHES +extern unsigned long __start_branch_profile[]; +extern unsigned long __stop_branch_profile[]; + +static struct ftrace_pointer ftrace_branch_pos = { + .start = __start_branch_profile, + .stop = __stop_branch_profile, + .hit = 1, +}; + +#endif /* CONFIG_PROFILE_ALL_BRANCHES */ + extern unsigned long __start_annotated_branch_profile[]; extern unsigned long __stop_annotated_branch_profile[]; @@ -306,6 +326,15 @@ static __init int ftrace_branch_init(void) pr_warning("Could not create debugfs " "'profile_annotatet_branch' entry\n"); +#ifdef CONFIG_PROFILE_ALL_BRANCHES + entry = debugfs_create_file("profile_branch", 0444, d_tracer, + &ftrace_branch_pos, + &tracing_branch_fops); + if (!entry) + pr_warning("Could not create debugfs" + " 'profile_branch' entry\n"); +#endif + return 0; } -- cgit v1.2.3 From 033601a32b2012b6948e80e739cca40bff4de4a0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 21 Nov 2008 12:41:55 -0500 Subject: ring-buffer: add tracing_off_permanent Impact: feature to permanently disable ring buffer This patch adds a API to the ring buffer code that will permanently disable the ring buffer from ever recording. This should only be called when some serious anomaly is detected, and the system may be in an unstable state. When that happens, shutting down the recording to the ring buffers may be appropriate. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ring_buffer.h | 1 + kernel/trace/ring_buffer.c | 79 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 67 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index e097c2e6b6dc..3bb87a753fa3 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -122,6 +122,7 @@ void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); void tracing_on(void); void tracing_off(void); +void tracing_off_permanent(void); enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 85ced143c2c4..e206951603c1 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -18,8 +18,46 @@ #include "trace.h" -/* Global flag to disable all recording to ring buffers */ -static int ring_buffers_off __read_mostly; +/* + * A fast way to enable or disable all ring buffers is to + * call tracing_on or tracing_off. Turning off the ring buffers + * prevents all ring buffers from being recorded to. + * Turning this switch on, makes it OK to write to the + * ring buffer, if the ring buffer is enabled itself. + * + * There's three layers that must be on in order to write + * to the ring buffer. + * + * 1) This global flag must be set. + * 2) The ring buffer must be enabled for recording. + * 3) The per cpu buffer must be enabled for recording. + * + * In case of an anomaly, this global flag has a bit set that + * will permantly disable all ring buffers. + */ + +/* + * Global flag to disable all recording to ring buffers + * This has two bits: ON, DISABLED + * + * ON DISABLED + * ---- ---------- + * 0 0 : ring buffers are off + * 1 0 : ring buffers are on + * X 1 : ring buffers are permanently disabled + */ + +enum { + RB_BUFFERS_ON_BIT = 0, + RB_BUFFERS_DISABLED_BIT = 1, +}; + +enum { + RB_BUFFERS_ON = 1 << RB_BUFFERS_ON_BIT, + RB_BUFFERS_DISABLED = 1 << RB_BUFFERS_DISABLED_BIT, +}; + +static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON; /** * tracing_on - enable all tracing buffers @@ -29,7 +67,7 @@ static int ring_buffers_off __read_mostly; */ void tracing_on(void) { - ring_buffers_off = 0; + set_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags); } /** @@ -42,7 +80,18 @@ void tracing_on(void) */ void tracing_off(void) { - ring_buffers_off = 1; + clear_bit(RB_BUFFERS_ON_BIT, &ring_buffer_flags); +} + +/** + * tracing_off_permanent - permanently disable ring buffers + * + * This function, once called, will disable all ring buffers + * permanenty. + */ +void tracing_off_permanent(void) +{ + set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags); } #include "trace.h" @@ -1185,7 +1234,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, struct ring_buffer_event *event; int cpu, resched; - if (ring_buffers_off) + if (ring_buffer_flags != RB_BUFFERS_ON) return NULL; if (atomic_read(&buffer->record_disabled)) @@ -1297,7 +1346,7 @@ int ring_buffer_write(struct ring_buffer *buffer, int ret = -EBUSY; int cpu, resched; - if (ring_buffers_off) + if (ring_buffer_flags != RB_BUFFERS_ON) return -EBUSY; if (atomic_read(&buffer->record_disabled)) @@ -2178,12 +2227,14 @@ static ssize_t rb_simple_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { - int *p = filp->private_data; + long *p = filp->private_data; char buf[64]; int r; - /* !ring_buffers_off == tracing_on */ - r = sprintf(buf, "%d\n", !*p); + if (test_bit(RB_BUFFERS_DISABLED_BIT, p)) + r = sprintf(buf, "permanently disabled\n"); + else + r = sprintf(buf, "%d\n", test_bit(RB_BUFFERS_ON_BIT, p)); return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } @@ -2192,7 +2243,7 @@ static ssize_t rb_simple_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - int *p = filp->private_data; + long *p = filp->private_data; char buf[64]; long val; int ret; @@ -2209,8 +2260,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf, if (ret < 0) return ret; - /* !ring_buffers_off == tracing_on */ - *p = !val; + if (val) + set_bit(RB_BUFFERS_ON_BIT, p); + else + clear_bit(RB_BUFFERS_ON_BIT, p); (*ppos)++; @@ -2232,7 +2285,7 @@ static __init int rb_init_debugfs(void) d_tracer = tracing_init_dentry(); entry = debugfs_create_file("tracing_on", 0644, d_tracer, - &ring_buffers_off, &rb_simple_fops); + &ring_buffer_flags, &rb_simple_fops); if (!entry) pr_warning("Could not create debugfs 'tracing_on' entry\n"); -- cgit v1.2.3 From 69bb54ec05f57da7f6fac2cec0820cbc970df20f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 21 Nov 2008 12:59:38 -0500 Subject: ftrace: add ftrace_off_permanent Impact: add new API to disable all of ftrace on anomalies It case of a serious anomaly being detected (like something caught by lockdep) it is a good idea to disable all tracing immediately, without grabing any locks. This patch adds ftrace_off_permanent that disables the tracers, function tracing and ring buffers without a way to enable them again. This should only be used when something serious has been detected. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 ++ kernel/trace/trace.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f7ba4ea5e128..13e9cfc09928 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -257,6 +257,7 @@ extern int ftrace_dump_on_oops; extern void tracing_start(void); extern void tracing_stop(void); +extern void ftrace_off_permanent(void); extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); @@ -290,6 +291,7 @@ ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); static inline void tracing_start(void) { } static inline void tracing_stop(void) { } +static inline void ftrace_off_permanent(void) { } static inline int ftrace_printk(const char *fmt, ...) { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4ee6f0375222..0dbfb23ced97 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -660,6 +660,21 @@ static void trace_init_cmdlines(void) static int trace_stop_count; static DEFINE_SPINLOCK(tracing_start_lock); +/** + * ftrace_off_permanent - disable all ftrace code permanently + * + * This should only be called when a serious anomally has + * been detected. This will turn off the function tracing, + * ring buffers, and other tracing utilites. It takes no + * locks and can be called from any context. + */ +void ftrace_off_permanent(void) +{ + tracing_disabled = 1; + ftrace_stop(); + tracing_off_permanent(); +} + /** * tracing_start - quick start of the tracer * -- cgit v1.2.3 From 8d7c6a96164651dbbab449ef0b5c20ae1f76a3a1 Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Sun, 23 Nov 2008 12:39:06 +0200 Subject: tracing/stack-tracer: fix style issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- arch/x86/kernel/stacktrace.c | 51 +++++++++++++++++++++++++------------------- include/linux/stacktrace.h | 2 +- kernel/trace/trace.c | 7 +++--- 3 files changed, 33 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index b15153060417..10786af95545 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk); struct stack_frame { const void __user *next_fp; - unsigned long return_address; + unsigned long ret_addr; }; static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) @@ -108,33 +108,40 @@ static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) return ret; } +static inline void __save_stack_trace_user(struct stack_trace *trace) +{ + const struct pt_regs *regs = task_pt_regs(current); + const void __user *fp = (const void __user *)regs->bp; + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = regs->ip; + + while (trace->nr_entries < trace->max_entries) { + struct stack_frame frame; + + frame.next_fp = NULL; + frame.ret_addr = 0; + if (!copy_stack_frame(fp, &frame)) + break; + if ((unsigned long)fp < regs->sp) + break; + if (frame.ret_addr) { + trace->entries[trace->nr_entries++] = + frame.ret_addr; + } + if (fp == frame.next_fp) + break; + fp = frame.next_fp; + } +} + void save_stack_trace_user(struct stack_trace *trace) { /* * Trace user stack if we are not a kernel thread */ if (current->mm) { - const struct pt_regs *regs = task_pt_regs(current); - const void __user *fp = (const void __user *)regs->bp; - - if (trace->nr_entries < trace->max_entries) - trace->entries[trace->nr_entries++] = regs->ip; - - while (trace->nr_entries < trace->max_entries) { - struct stack_frame frame; - frame.next_fp = NULL; - frame.return_address = 0; - if (!copy_stack_frame(fp, &frame)) - break; - if ((unsigned long)fp < regs->sp) - break; - if (frame.return_address) - trace->entries[trace->nr_entries++] = - frame.return_address; - if (fp == frame.next_fp) - break; - fp = frame.next_fp; - } + __save_stack_trace_user(trace); } if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 68de51468f5d..fd42d6851109 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -25,7 +25,7 @@ extern void save_stack_trace_user(struct stack_trace *trace); #else # define save_stack_trace(trace) do { } while (0) # define save_stack_trace_tsk(tsk, trace) do { } while (0) -# define save_stack_trace_user(trace) do { } while (0) +# define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) #endif diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 62776b71b1c5..dedf35f36971 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -948,9 +948,9 @@ static void ftrace_trace_userstack(struct trace_array *tr, struct trace_array_cpu *data, unsigned long flags, int pc) { + struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; - struct ring_buffer_event *event; unsigned long irq_flags; if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) @@ -1471,8 +1471,7 @@ static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, if (file) { ret = trace_seq_path(s, &file->f_path); if (ret) - ret = trace_seq_printf(s, "[+0x%lx]", - ip - vmstart); + ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart); } if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) ret = trace_seq_printf(s, " <" IP_FMT ">", ip); @@ -1485,7 +1484,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, { struct mm_struct *mm = NULL; int ret = 1; - unsigned i; + unsigned int i; if (trace_flags & TRACE_ITER_SYM_USEROBJ) { struct task_struct *task; -- cgit v1.2.3 From 8d26487fd4ddda7a0237da418fb8669fb06ae557 Mon Sep 17 00:00:00 2001 From: Török Edwin Date: Sun, 23 Nov 2008 12:39:08 +0200 Subject: tracing/stack-tracer: introduce CONFIG_USER_STACKTRACE_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: cleanup User stack tracing is just implemented for x86, but it is not x86 specific. Introduce a generic config flag, that is currently enabled only for x86. When other arches implement it, they will have to SELECT USER_STACKTRACE_SUPPORT. Signed-off-by: Török Edwin Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + include/linux/stacktrace.h | 2 +- kernel/trace/Kconfig | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7a146baaa990..e49a4fd718fe 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -36,6 +36,7 @@ config X86 select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS + select USER_STACKTRACE_SUPPORT config ARCH_DEFCONFIG string diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index fd42d6851109..1a8cecc4f38c 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -16,7 +16,7 @@ extern void save_stack_trace_tsk(struct task_struct *tsk, extern void print_stack_trace(struct stack_trace *trace, int spaces); -#ifdef CONFIG_X86 +#ifdef CONFIG_USER_STACKTRACE_SUPPORT extern void save_stack_trace_user(struct stack_trace *trace); #else # define save_stack_trace_user(trace) do { } while (0) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index b8378fad29a3..87fc34a1bb91 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -3,6 +3,9 @@ # select HAVE_FUNCTION_TRACER: # +config USER_STACKTRACE_SUPPORT + bool + config NOP_TRACER bool -- cgit v1.2.3 From e262a7ba31f0cd4dae225e6d2e9037e5ac6108e8 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Sun, 23 Nov 2008 14:34:43 +0000 Subject: irq.h: remove padding from irq_desc on 64bits Impact: reduce struct irq_desc size struct irq_desc: reorder to remove padding on 64bits shrinks irq_desc to 128 bytes which saves data space & cache lines On a generic x86_64/SMP build this reduces the reported data size by 64k. Signed-off-by: Richard Kennedy Signed-off-by: Ingo Molnar --- include/linux/irq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index d058c57be02d..838a977885e1 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -142,8 +142,8 @@ struct irq_chip { * @depth: disable-depth, for nested irq_disable() calls * @wake_depth: enable depth, for multiple set_irq_wake() callers * @irq_count: stats field to detect stalled irqs - * @irqs_unhandled: stats field for spurious unhandled interrupts * @last_unhandled: aging timer for unhandled count + * @irqs_unhandled: stats field for spurious unhandled interrupts * @lock: locking for SMP * @affinity: IRQ affinity on SMP * @cpu: cpu index useful for balancing @@ -165,8 +165,8 @@ struct irq_desc { unsigned int depth; /* nested irq disables */ unsigned int wake_depth; /* nested wake enables */ unsigned int irq_count; /* For detecting broken IRQs */ - unsigned int irqs_unhandled; unsigned long last_unhandled; /* Aging timer for unhandled count */ + unsigned int irqs_unhandled; spinlock_t lock; #ifdef CONFIG_SMP cpumask_t affinity; -- cgit v1.2.3 From b20a9c24d5c5d466d7e4a25c6f1bedbd2d16ad4f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 23 Nov 2008 16:02:31 -0800 Subject: dccp: Set per-connection CCIDs via socket options With this patch, TX/RX CCIDs can now be changed on a per-connection basis, which overrides the defaults set by the global sysctl variables for TX/RX CCIDs. To make full use of this facility, the remaining patches of this patch set are needed, which track dependencies and activate negotiated feature values. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- Documentation/networking/dccp.txt | 14 ++++++++++++++ include/linux/dccp.h | 5 +++++ net/dccp/ackvec.c | 9 ++++----- net/dccp/ackvec.h | 5 ++--- net/dccp/feat.h | 2 ++ net/dccp/proto.c | 34 ++++++++++++++++++++++++++++++++++ 6 files changed, 61 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt index 43df4487379b..610083ff73f6 100644 --- a/Documentation/networking/dccp.txt +++ b/Documentation/networking/dccp.txt @@ -61,6 +61,20 @@ DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs supported by the endpoint (see include/linux/dccp.h for symbolic constants). The caller needs to provide a sufficiently large (> 2) array of type uint8_t. +DCCP_SOCKOPT_CCID is write-only and sets both the TX and RX CCIDs at the same +time, combining the operation of the next two socket options. This option is +preferrable over the latter two, since often applications will use the same +type of CCID for both directions; and mixed use of CCIDs is not currently well +understood. This socket option takes as argument at least one uint8_t value, or +an array of uint8_t values, which must match available CCIDS (see above). CCIDs +must be registered on the socket before calling connect() or listen(). + +DCCP_SOCKOPT_TX_CCID is read/write. It returns the current CCID (if set) or sets +the preference list for the TX CCID, using the same format as DCCP_SOCKOPT_CCID. +Please note that the getsockopt argument type here is `int', not uint8_t. + +DCCP_SOCKOPT_RX_CCID is analogous to DCCP_SOCKOPT_TX_CCID, but for the RX CCID. + DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold timewait state when closing the connection (RFC 4340, 8.3). The usual case is that the closing server sends a CloseReq, whereupon the client holds timewait diff --git a/include/linux/dccp.h b/include/linux/dccp.h index eda389ce04f4..6a72ff52a8a4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -168,6 +168,8 @@ enum { DCCPO_MIN_CCID_SPECIFIC = 128, DCCPO_MAX_CCID_SPECIFIC = 255, }; +/* maximum size of a single TLV-encoded DCCP option (sans type/len bytes) */ +#define DCCP_SINGLE_OPT_MAXLEN 253 /* DCCP CCIDS */ enum { @@ -203,6 +205,9 @@ enum dccp_feature_numbers { #define DCCP_SOCKOPT_SEND_CSCOV 10 #define DCCP_SOCKOPT_RECV_CSCOV 11 #define DCCP_SOCKOPT_AVAILABLE_CCIDS 12 +#define DCCP_SOCKOPT_CCID 13 +#define DCCP_SOCKOPT_TX_CCID 14 +#define DCCP_SOCKOPT_RX_CCID 15 #define DCCP_SOCKOPT_CCID_RX_INFO 128 #define DCCP_SOCKOPT_CCID_TX_INFO 192 diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c index 1e8be246ad15..01e4d39fa232 100644 --- a/net/dccp/ackvec.c +++ b/net/dccp/ackvec.c @@ -12,7 +12,6 @@ #include "ackvec.h" #include "dccp.h" -#include #include #include #include @@ -68,7 +67,7 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; /* Figure out how many options do we need to represent the ackvec */ - const u16 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_MAX_ACKVEC_OPT_LEN); + const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN); u16 len = av->av_vec_len + 2 * nr_opts, i; u32 elapsed_time; const unsigned char *tail, *from; @@ -100,8 +99,8 @@ int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) for (i = 0; i < nr_opts; ++i) { int copylen = len; - if (len > DCCP_MAX_ACKVEC_OPT_LEN) - copylen = DCCP_MAX_ACKVEC_OPT_LEN; + if (len > DCCP_SINGLE_OPT_MAXLEN) + copylen = DCCP_SINGLE_OPT_MAXLEN; *to++ = DCCPO_ACK_VECTOR_0; *to++ = copylen + 2; @@ -432,7 +431,7 @@ found: int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, u64 *ackno, const u8 opt, const u8 *value, const u8 len) { - if (len > DCCP_MAX_ACKVEC_OPT_LEN) + if (len > DCCP_SINGLE_OPT_MAXLEN) return -1; /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index bcb64fb4acef..4ccee030524e 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -11,15 +11,14 @@ * published by the Free Software Foundation. */ +#include #include #include #include #include -/* Read about the ECN nonce to see why it is 253 */ -#define DCCP_MAX_ACKVEC_OPT_LEN 253 /* We can spread an ack vector across multiple options */ -#define DCCP_MAX_ACKVEC_LEN (DCCP_MAX_ACKVEC_OPT_LEN * 2) +#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2) #define DCCP_ACKVEC_STATE_RECEIVED 0 #define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) diff --git a/net/dccp/feat.h b/net/dccp/feat.h index 4d172822df17..093af1610d11 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -22,6 +22,8 @@ /* Wmin=32 and Wmax=2^46-1 from 7.5.2 */ #define DCCPF_SEQ_WMIN 32 #define DCCPF_SEQ_WMAX 0x3FFFFFFFFFFFull +/* Maximum number of SP values that fit in a single (Confirm) option */ +#define DCCP_FEAT_MAX_SP_VALS (DCCP_SINGLE_OPT_MAXLEN - 2) enum dccp_feat_type { FEAT_AT_RX = 1, /* located at RX side of half-connection */ diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 8b63394ec24c..445884cf1c29 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -501,6 +501,36 @@ static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx) return rc; } +static int dccp_setsockopt_ccid(struct sock *sk, int type, + char __user *optval, int optlen) +{ + u8 *val; + int rc = 0; + + if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS) + return -EINVAL; + + val = kmalloc(optlen, GFP_KERNEL); + if (val == NULL) + return -ENOMEM; + + if (copy_from_user(val, optval, optlen)) { + kfree(val); + return -EFAULT; + } + + lock_sock(sk); + if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID) + rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen); + + if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID)) + rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen); + release_sock(sk); + + kfree(val); + return rc; +} + static int do_dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { @@ -515,6 +545,10 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_CHANGE_R: DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n"); return 0; + case DCCP_SOCKOPT_CCID: + case DCCP_SOCKOPT_RX_CCID: + case DCCP_SOCKOPT_TX_CCID: + return dccp_setsockopt_ccid(sk, optname, optval, optlen); } if (optlen < (int)sizeof(int)) -- cgit v1.2.3 From c25eb3bfb97294d0543a81230fbc237046b4b84c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Nov 2008 17:22:55 -0800 Subject: net: Convert TCP/DCCP listening hash tables to use RCU This is the last step to be able to perform full RCU lookups in __inet_lookup() : After established/timewait tables, we add RCU lookups to listening hash table. The only trick here is that a socket of a given type (TCP ipv4, TCP ipv6, ...) can now flight between two different tables (established and listening) during a RCU grace period, so we must use different 'nulls' end-of-chain values for two tables. We define a large value : #define LISTENING_NULLS_BASE (1U << 29) So that slots in listening table are guaranteed to have different end-of-chain values than slots in established table. A reader can still detect it finished its lookup in the right chain. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 9 ++- net/ipv4/inet_diag.c | 4 +- net/ipv4/inet_hashtables.c | 148 +++++++++++++++++++++--------------------- net/ipv4/tcp_ipv4.c | 8 +-- net/ipv6/inet6_hashtables.c | 94 +++++++++++++++++---------- 5 files changed, 147 insertions(+), 116 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ec7ee2e46d8c..f44bb5c77a70 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -99,9 +99,16 @@ struct inet_bind_hashbucket { struct hlist_head chain; }; +/* + * Sockets can be hashed in established or listening table + * We must use different 'nulls' end-of-chain value for listening + * hash table, or we might find a socket that was closed and + * reallocated/inserted into established hash table + */ +#define LISTENING_NULLS_BASE (1U << 29) struct inet_listen_hashbucket { spinlock_t lock; - struct hlist_head head; + struct hlist_nulls_head head; }; /* This is for listening sockets, thus all sockets which possess wildcards. */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 998a78f169ff..588a7796e3e3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -720,13 +720,13 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; - struct hlist_node *node; + struct hlist_nulls_node *node; struct inet_listen_hashbucket *ilb; num = 0; ilb = &hashinfo->listening_hash[i]; spin_lock_bh(&ilb->lock); - sk_for_each(sk, node, &ilb->head) { + sk_nulls_for_each(sk, node, &ilb->head) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) { diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 4c273a9981a6..11fcb87a1fdd 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -110,78 +110,79 @@ void __inet_inherit_port(struct sock *sk, struct sock *child) EXPORT_SYMBOL_GPL(__inet_inherit_port); +static inline int compute_score(struct sock *sk, struct net *net, + const unsigned short hnum, const __be32 daddr, + const int dif) +{ + int score = -1; + struct inet_sock *inet = inet_sk(sk); + + if (net_eq(sock_net(sk), net) && inet->num == hnum && + !ipv6_only_sock(sk)) { + __be32 rcv_saddr = inet->rcv_saddr; + score = sk->sk_family == PF_INET ? 1 : 0; + if (rcv_saddr) { + if (rcv_saddr != daddr) + return -1; + score += 2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score += 2; + } + } + return score; +} + /* * Don't inline this cruft. Here are some nice properties to exploit here. The * BSD API does not allow a listening sock to specify the remote port nor the * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ -static struct sock *inet_lookup_listener_slow(struct net *net, - const struct hlist_head *head, - const __be32 daddr, - const unsigned short hnum, - const int dif) -{ - struct sock *result = NULL, *sk; - const struct hlist_node *node; - int hiscore = -1; - - sk_for_each(sk, node, head) { - const struct inet_sock *inet = inet_sk(sk); - - if (net_eq(sock_net(sk), net) && inet->num == hnum && - !ipv6_only_sock(sk)) { - const __be32 rcv_saddr = inet->rcv_saddr; - int score = sk->sk_family == PF_INET ? 1 : 0; - - if (rcv_saddr) { - if (rcv_saddr != daddr) - continue; - score += 2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score += 2; - } - if (score == 5) - return sk; - if (score > hiscore) { - hiscore = score; - result = sk; - } - } - } - return result; -} -/* Optimize the common listener case. */ + struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, const __be32 daddr, const unsigned short hnum, const int dif) { - struct sock *sk = NULL; - struct inet_listen_hashbucket *ilb; + struct sock *sk, *result; + struct hlist_nulls_node *node; + unsigned int hash = inet_lhashfn(net, hnum); + struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; + int score, hiscore; - ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; - spin_lock(&ilb->lock); - if (!hlist_empty(&ilb->head)) { - const struct inet_sock *inet = inet_sk((sk = __sk_head(&ilb->head))); - - if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && - (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) - goto sherry_cache; - sk = inet_lookup_listener_slow(net, &ilb->head, daddr, hnum, dif); + rcu_read_lock(); +begin: + result = NULL; + hiscore = -1; + sk_nulls_for_each_rcu(sk, node, &ilb->head) { + score = compute_score(sk, net, hnum, daddr, dif); + if (score > hiscore) { + result = sk; + hiscore = score; + } } - if (sk) { -sherry_cache: - sock_hold(sk); + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) + goto begin; + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, daddr, + dif) < hiscore)) { + sock_put(result); + goto begin; + } } - spin_unlock(&ilb->lock); - return sk; + rcu_read_unlock(); + return result; } EXPORT_SYMBOL_GPL(__inet_lookup_listener); @@ -370,7 +371,7 @@ static void __inet_hash(struct sock *sk) ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; spin_lock(&ilb->lock); - __sk_add_node(sk, &ilb->head); + __sk_nulls_add_node_rcu(sk, &ilb->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); spin_unlock(&ilb->lock); } @@ -388,26 +389,22 @@ EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + spinlock_t *lock; + int done; if (sk_unhashed(sk)) return; - if (sk->sk_state == TCP_LISTEN) { - struct inet_listen_hashbucket *ilb; + if (sk->sk_state == TCP_LISTEN) + lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; + else + lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - spin_lock_bh(&ilb->lock); - if (__sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - spin_unlock_bh(&ilb->lock); - } else { - spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - - spin_lock_bh(lock); - if (__sk_nulls_del_node_init_rcu(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - spin_unlock_bh(lock); - } + spin_lock_bh(lock); + done =__sk_nulls_del_node_init_rcu(sk); + spin_unlock_bh(lock); + if (done) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } EXPORT_SYMBOL_GPL(inet_unhash); @@ -526,8 +523,11 @@ void inet_hashinfo_init(struct inet_hashinfo *h) { int i; - for (i = 0; i < INET_LHTABLE_SIZE; i++) + for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); + INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, + i + LISTENING_NULLS_BASE); + } } EXPORT_SYMBOL_GPL(inet_hashinfo_init); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a81caa1be0cf..cab2458f86fd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1868,7 +1868,7 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) static void *listening_get_next(struct seq_file *seq, void *cur) { struct inet_connection_sock *icsk; - struct hlist_node *node; + struct hlist_nulls_node *node; struct sock *sk = cur; struct inet_listen_hashbucket *ilb; struct tcp_iter_state *st = seq->private; @@ -1878,7 +1878,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) st->bucket = 0; ilb = &tcp_hashinfo.listening_hash[0]; spin_lock_bh(&ilb->lock); - sk = sk_head(&ilb->head); + sk = sk_nulls_head(&ilb->head); goto get_sk; } ilb = &tcp_hashinfo.listening_hash[st->bucket]; @@ -1914,7 +1914,7 @@ get_req: sk = sk_next(sk); } get_sk: - sk_for_each_from(sk, node) { + sk_nulls_for_each_from(sk, node) { if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) { cur = sk; goto out; @@ -1935,7 +1935,7 @@ start_req: if (++st->bucket < INET_LHTABLE_SIZE) { ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock_bh(&ilb->lock); - sk = sk_head(&ilb->head); + sk = sk_nulls_head(&ilb->head); goto get_sk; } cur = NULL; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index e0fd68187f83..8fe267feb81e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -33,7 +33,7 @@ void __inet6_hash(struct sock *sk) ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; spin_lock(&ilb->lock); - __sk_add_node(sk, &ilb->head); + __sk_nulls_add_node_rcu(sk, &ilb->head); spin_unlock(&ilb->lock); } else { unsigned int hash; @@ -118,47 +118,71 @@ out: } EXPORT_SYMBOL(__inet6_lookup_established); +static int inline compute_score(struct sock *sk, struct net *net, + const unsigned short hnum, + const struct in6_addr *daddr, + const int dif) +{ + int score = -1; + + if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && + sk->sk_family == PF_INET6) { + const struct ipv6_pinfo *np = inet6_sk(sk); + + score = 1; + if (!ipv6_addr_any(&np->rcv_saddr)) { + if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) + return -1; + score++; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score++; + } + } + return score; +} + struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *daddr, const unsigned short hnum, const int dif) { struct sock *sk; - const struct hlist_node *node; - struct sock *result = NULL; - int score, hiscore = 0; - struct inet_listen_hashbucket *ilb; - - ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; - spin_lock(&ilb->lock); - sk_for_each(sk, node, &ilb->head) { - if (net_eq(sock_net(sk), net) && inet_sk(sk)->num == hnum && - sk->sk_family == PF_INET6) { - const struct ipv6_pinfo *np = inet6_sk(sk); - - score = 1; - if (!ipv6_addr_any(&np->rcv_saddr)) { - if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) - continue; - score++; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score++; - } - if (score == 3) { - result = sk; - break; - } - if (score > hiscore) { - hiscore = score; - result = sk; - } + const struct hlist_nulls_node *node; + struct sock *result; + int score, hiscore; + unsigned int hash = inet_lhashfn(net, hnum); + struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; + + rcu_read_lock(); +begin: + result = NULL; + hiscore = -1; + sk_nulls_for_each(sk, node, &ilb->head) { + score = compute_score(sk, net, hnum, daddr, dif); + if (score > hiscore) { + hiscore = score; + result = sk; } } - if (result) - sock_hold(result); - spin_unlock(&ilb->lock); + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) + goto begin; + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, daddr, + dif) < hiscore)) { + sock_put(result); + goto begin; + } + } + rcu_read_unlock(); return result; } -- cgit v1.2.3 From 1f87e235e6fb92c2968b52b9191de04f1aff8e77 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Nov 2008 23:24:32 -0800 Subject: eth: Declare an optimized compare_ether_addr_64bits() function Linus mentioned we could try to perform long word operations, even on potentially unaligned addresses, on x86 at least. David mentioned the HAVE_EFFICIENT_UNALIGNED_ACCESS test to handle this on all arches that have efficient unailgned accesses. I tried this idea and got nice assembly on 32 bits: 158: 33 82 38 01 00 00 xor 0x138(%edx),%eax 15e: 33 8a 34 01 00 00 xor 0x134(%edx),%ecx 164: c1 e0 10 shl $0x10,%eax 167: 09 c1 or %eax,%ecx 169: 74 0b je 176 And very nice assembly on 64 bits of course (one xor, one shl) Nice oprofile improvement in eth_type_trans(), 0.17 % instead of 0.41 %, expected since we remove 8 instructions on a fast path. This patch implements a compare_ether_addr_64bits() function, that uses the CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS ifdef to efficiently perform the 6 bytes comparison on all capable arches. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 42 ++++++++++++++++++++++++++++++++++++++++++ net/ethernet/eth.c | 6 +++--- 2 files changed, 45 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 0e5e97060034..1cb0f0b90926 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef __KERNEL__ extern __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); @@ -140,6 +141,47 @@ static inline unsigned compare_ether_addr(const u8 *addr1, const u8 *addr2) BUILD_BUG_ON(ETH_ALEN != 6); return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; } + +static inline unsigned long zap_last_2bytes(unsigned long value) +{ +#ifdef __BIG_ENDIAN + return value >> 16; +#else + return value << 16; +#endif +} + +/** + * compare_ether_addr_64bits - Compare two Ethernet addresses + * @addr1: Pointer to an array of 8 bytes + * @addr2: Pointer to an other array of 8 bytes + * + * Compare two ethernet addresses, returns 0 if equal. + * Same result than "memcmp(addr1, addr2, ETH_ALEN)" but without conditional + * branches, and possibly long word memory accesses on CPU allowing cheap + * unaligned memory reads. + * arrays = { byte1, byte2, byte3, byte4, byte6, byte7, pad1, pad2} + * + * Please note that alignment of addr1 & addr2 is only guaranted to be 16 bits. + */ + +static inline unsigned compare_ether_addr_64bits(const u8 addr1[6+2], + const u8 addr2[6+2]) +{ +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + unsigned long fold = ((*(unsigned long *)addr1) ^ + (*(unsigned long *)addr2)); + + if (sizeof(fold) == 8) + return zap_last_2bytes(fold) != 0; + + fold |= zap_last_2bytes((*(unsigned long *)(addr1 + 4)) ^ + (*(unsigned long *)(addr2 + 4))); + return fold != 0; +#else + return compare_ether_addr(addr1, addr2); +#endif +} #endif /* __KERNEL__ */ #endif /* _LINUX_ETHERDEVICE_H */ diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index a87a171d9914..280352aba403 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -165,8 +165,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) skb_pull(skb, ETH_HLEN); eth = eth_hdr(skb); - if (is_multicast_ether_addr(eth->h_dest)) { - if (!compare_ether_addr(eth->h_dest, dev->broadcast)) + if (unlikely(is_multicast_ether_addr(eth->h_dest))) { + if (!compare_ether_addr_64bits(eth->h_dest, dev->broadcast)) skb->pkt_type = PACKET_BROADCAST; else skb->pkt_type = PACKET_MULTICAST; @@ -181,7 +181,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) */ else if (1 /*dev->flags&IFF_PROMISC */ ) { - if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr))) + if (unlikely(compare_ether_addr_64bits(eth->h_dest, dev->dev_addr))) skb->pkt_type = PACKET_OTHERHOST; } -- cgit v1.2.3 From 3ba9e10a6d3b6abf5f5952572cff8f8d5a35ae54 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 24 Nov 2008 18:01:05 +0000 Subject: ASoC: Remove DAI type information DAI type information is only ever used within ASoC in order to special case AC97 and for diagnostic purposes. Since modern CPUs and codecs support multi function DAIs which can be configured for several modes it is more trouble than it's worth to maintain anything other than a flag identifying AC97 DAIs so remove the type field and replace it with an ac97_control flag. Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 2 +- include/sound/soc.h | 8 -------- sound/soc/atmel/atmel_ssc_dai.c | 3 --- sound/soc/au1x/psc-ac97.c | 2 +- sound/soc/au1x/psc-i2s.c | 1 - sound/soc/blackfin/bf5xx-ac97.c | 2 +- sound/soc/blackfin/bf5xx-i2s.c | 1 - sound/soc/codecs/ac97.c | 2 +- sound/soc/codecs/pcm3008.c | 1 - sound/soc/codecs/wm9712.c | 2 +- sound/soc/codecs/wm9713.c | 2 +- sound/soc/davinci/davinci-i2s.c | 1 - sound/soc/fsl/mpc5200_psc_i2s.c | 1 - sound/soc/omap/omap-mcbsp.c | 1 - sound/soc/pxa/pxa-ssp.c | 4 ---- sound/soc/pxa/pxa2xx-ac97.c | 6 +++--- sound/soc/pxa/pxa2xx-i2s.c | 1 - sound/soc/s3c24xx/neo1973_wm8753.c | 1 - sound/soc/s3c24xx/s3c2412-i2s.c | 1 - sound/soc/s3c24xx/s3c2443-ac97.c | 4 ++-- sound/soc/s3c24xx/s3c24xx-i2s.c | 1 - sound/soc/sh/hac.c | 4 ++-- sound/soc/sh/ssi.c | 2 -- sound/soc/soc-core.c | 31 ++++++++----------------------- 24 files changed, 21 insertions(+), 63 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index f51cb55902f7..a01a24b10196 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -184,7 +184,7 @@ struct snd_soc_dai { /* DAI description */ char *name; unsigned int id; - unsigned char type; + int ac97_control; /* DAI callbacks */ int (*probe)(struct platform_device *pdev, diff --git a/include/sound/soc.h b/include/sound/soc.h index e4465f73aa46..444f9c211379 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -143,14 +143,6 @@ enum snd_soc_bias_level { SND_SOC_BIAS_OFF, }; -/* - * Digital Audio Interface (DAI) types - */ -#define SND_SOC_DAI_AC97 0x1 -#define SND_SOC_DAI_I2S 0x2 -#define SND_SOC_DAI_PCM 0x4 -#define SND_SOC_DAI_AC97_BUS 0x8 /* for custom i.e. non ac97_codec.c */ - struct snd_soc_device; struct snd_soc_pcm_stream; struct snd_soc_ops; diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index 916f73b9a18f..0bb18dfa9495 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -702,7 +702,6 @@ static int atmel_ssc_resume(struct platform_device *pdev, struct snd_soc_dai atmel_ssc_dai[NUM_SSC_DEVICES] = { { .name = "atmel-ssc0", .id = 0, - .type = SND_SOC_DAI_PCM, .suspend = atmel_ssc_suspend, .resume = atmel_ssc_resume, .playback = { @@ -727,7 +726,6 @@ struct snd_soc_dai atmel_ssc_dai[NUM_SSC_DEVICES] = { #if NUM_SSC_DEVICES == 3 { .name = "atmel-ssc1", .id = 1, - .type = SND_SOC_DAI_PCM, .suspend = atmel_ssc_suspend, .resume = atmel_ssc_resume, .playback = { @@ -751,7 +749,6 @@ struct snd_soc_dai atmel_ssc_dai[NUM_SSC_DEVICES] = { }, { .name = "atmel-ssc2", .id = 2, - .type = SND_SOC_DAI_PCM, .suspend = atmel_ssc_suspend, .resume = atmel_ssc_resume, .playback = { diff --git a/sound/soc/au1x/psc-ac97.c b/sound/soc/au1x/psc-ac97.c index ad60a6042cad..a0bcfeaf5f86 100644 --- a/sound/soc/au1x/psc-ac97.c +++ b/sound/soc/au1x/psc-ac97.c @@ -346,7 +346,7 @@ static int au1xpsc_ac97_resume(struct platform_device *pdev, struct snd_soc_dai au1xpsc_ac97_dai = { .name = "au1xpsc_ac97", - .type = SND_SOC_DAI_AC97, + .ac97_control = 1, .probe = au1xpsc_ac97_probe, .remove = au1xpsc_ac97_remove, .suspend = au1xpsc_ac97_suspend, diff --git a/sound/soc/au1x/psc-i2s.c b/sound/soc/au1x/psc-i2s.c index 05a5acbb16ae..f4217e70a787 100644 --- a/sound/soc/au1x/psc-i2s.c +++ b/sound/soc/au1x/psc-i2s.c @@ -371,7 +371,6 @@ static int au1xpsc_i2s_resume(struct platform_device *pdev, struct snd_soc_dai au1xpsc_i2s_dai = { .name = "au1xpsc_i2s", - .type = SND_SOC_DAI_I2S, .probe = au1xpsc_i2s_probe, .remove = au1xpsc_i2s_remove, .suspend = au1xpsc_i2s_suspend, diff --git a/sound/soc/blackfin/bf5xx-ac97.c b/sound/soc/blackfin/bf5xx-ac97.c index 5dcd3f665ab1..709bdf08e398 100644 --- a/sound/soc/blackfin/bf5xx-ac97.c +++ b/sound/soc/blackfin/bf5xx-ac97.c @@ -409,7 +409,7 @@ static void bf5xx_ac97_remove(struct platform_device *pdev, struct snd_soc_dai bfin_ac97_dai = { .name = "bf5xx-ac97", .id = 0, - .type = SND_SOC_DAI_AC97, + .ac97_control = 1, .probe = bf5xx_ac97_probe, .remove = bf5xx_ac97_remove, .suspend = bf5xx_ac97_suspend, diff --git a/sound/soc/blackfin/bf5xx-i2s.c b/sound/soc/blackfin/bf5xx-i2s.c index 4e675b55b182..6e5036bf9245 100644 --- a/sound/soc/blackfin/bf5xx-i2s.c +++ b/sound/soc/blackfin/bf5xx-i2s.c @@ -292,7 +292,6 @@ static int bf5xx_i2s_resume(struct platform_device *pdev, struct snd_soc_dai bf5xx_i2s_dai = { .name = "bf5xx-i2s", .id = 0, - .type = SND_SOC_DAI_I2S, .probe = bf5xx_i2s_probe, .remove = bf5xx_i2s_remove, .suspend = bf5xx_i2s_suspend, diff --git a/sound/soc/codecs/ac97.c b/sound/soc/codecs/ac97.c index 8a93aff359d0..c4208c8210c8 100644 --- a/sound/soc/codecs/ac97.c +++ b/sound/soc/codecs/ac97.c @@ -43,7 +43,7 @@ static int ac97_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai ac97_dai = { .name = "AC97 HiFi", - .type = SND_SOC_DAI_AC97, + .ac97_control = 1, .playback = { .stream_name = "AC97 Playback", .channels_min = 1, diff --git a/sound/soc/codecs/pcm3008.c b/sound/soc/codecs/pcm3008.c index 2b26e1d80c8d..651a15eb8c2c 100644 --- a/sound/soc/codecs/pcm3008.c +++ b/sound/soc/codecs/pcm3008.c @@ -33,7 +33,6 @@ struct snd_soc_dai pcm3008_dai = { .name = "PCM3008 HiFi", - .type = SND_SOC_DAI_I2S, .playback = { .stream_name = "PCM3008 Playback", .channels_min = 1, diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c index 6e3e0f340179..40f14061fb72 100644 --- a/sound/soc/codecs/wm9712.c +++ b/sound/soc/codecs/wm9712.c @@ -535,7 +535,7 @@ static int ac97_aux_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai wm9712_dai[] = { { .name = "AC97 HiFi", - .type = SND_SOC_DAI_AC97_BUS, + .ac97_control = 1, .playback = { .stream_name = "HiFi Playback", .channels_min = 1, diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c index a502667fca7a..9dad0bffcb05 100644 --- a/sound/soc/codecs/wm9713.c +++ b/sound/soc/codecs/wm9713.c @@ -1024,7 +1024,7 @@ static int ac97_aux_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai wm9713_dai[] = { { .name = "AC97 HiFi", - .type = SND_SOC_DAI_AC97_BUS, + .ac97_control = 1, .playback = { .stream_name = "HiFi Playback", .channels_min = 1, diff --git a/sound/soc/davinci/davinci-i2s.c b/sound/soc/davinci/davinci-i2s.c index 7a17cd0ecf64..cf31b3bb96cf 100644 --- a/sound/soc/davinci/davinci-i2s.c +++ b/sound/soc/davinci/davinci-i2s.c @@ -460,7 +460,6 @@ static void davinci_i2s_remove(struct platform_device *pdev, struct snd_soc_dai davinci_i2s_dai = { .name = "davinci-i2s", .id = 0, - .type = SND_SOC_DAI_I2S, .probe = davinci_i2s_probe, .remove = davinci_i2s_remove, .playback = { diff --git a/sound/soc/fsl/mpc5200_psc_i2s.c b/sound/soc/fsl/mpc5200_psc_i2s.c index e2c172f38979..9ad8f9a2d8e9 100644 --- a/sound/soc/fsl/mpc5200_psc_i2s.c +++ b/sound/soc/fsl/mpc5200_psc_i2s.c @@ -469,7 +469,6 @@ static int psc_i2s_set_fmt(struct snd_soc_dai *cpu_dai, unsigned int format) * psc_i2s_dai_template: template CPU Digital Audio Interface */ static struct snd_soc_dai psc_i2s_dai_template = { - .type = SND_SOC_DAI_I2S, .playback = { .channels_min = 2, .channels_max = 2, diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c index 2eeb135c1e4b..252bc7ebb194 100644 --- a/sound/soc/omap/omap-mcbsp.c +++ b/sound/soc/omap/omap-mcbsp.c @@ -456,7 +456,6 @@ static int omap_mcbsp_dai_set_dai_sysclk(struct snd_soc_dai *cpu_dai, { \ .name = "omap-mcbsp-dai-"#link_id, \ .id = (link_id), \ - .type = SND_SOC_DAI_I2S, \ .playback = { \ .channels_min = 2, \ .channels_max = 2, \ diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index d0dd6245a20a..402fc5ba65e7 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -788,7 +788,6 @@ struct snd_soc_dai pxa_ssp_dai[] = { { .name = "pxa2xx-ssp1", .id = 0, - .type = SND_SOC_DAI_PCM, .probe = pxa_ssp_probe, .remove = pxa_ssp_remove, .suspend = pxa_ssp_suspend, @@ -820,7 +819,6 @@ struct snd_soc_dai pxa_ssp_dai[] = { }, { .name = "pxa2xx-ssp2", .id = 1, - .type = SND_SOC_DAI_PCM, .probe = pxa_ssp_probe, .remove = pxa_ssp_remove, .suspend = pxa_ssp_suspend, @@ -853,7 +851,6 @@ struct snd_soc_dai pxa_ssp_dai[] = { { .name = "pxa2xx-ssp3", .id = 2, - .type = SND_SOC_DAI_PCM, .probe = pxa_ssp_probe, .remove = pxa_ssp_remove, .suspend = pxa_ssp_suspend, @@ -886,7 +883,6 @@ struct snd_soc_dai pxa_ssp_dai[] = { { .name = "pxa2xx-ssp4", .id = 3, - .type = SND_SOC_DAI_PCM, .probe = pxa_ssp_probe, .remove = pxa_ssp_remove, .suspend = pxa_ssp_suspend, diff --git a/sound/soc/pxa/pxa2xx-ac97.c b/sound/soc/pxa/pxa2xx-ac97.c index 86667d2f1b75..bffbe288634c 100644 --- a/sound/soc/pxa/pxa2xx-ac97.c +++ b/sound/soc/pxa/pxa2xx-ac97.c @@ -173,7 +173,7 @@ struct snd_soc_dai pxa_ac97_dai[] = { { .name = "pxa2xx-ac97", .id = 0, - .type = SND_SOC_DAI_AC97, + .ac97_control = 1, .probe = pxa2xx_ac97_probe, .remove = pxa2xx_ac97_remove, .suspend = pxa2xx_ac97_suspend, @@ -196,7 +196,7 @@ struct snd_soc_dai pxa_ac97_dai[] = { { .name = "pxa2xx-ac97-aux", .id = 1, - .type = SND_SOC_DAI_AC97, + .ac97_control = 1, .playback = { .stream_name = "AC97 Aux Playback", .channels_min = 1, @@ -215,7 +215,7 @@ struct snd_soc_dai pxa_ac97_dai[] = { { .name = "pxa2xx-ac97-mic", .id = 2, - .type = SND_SOC_DAI_AC97, + .ac97_control = 1, .capture = { .stream_name = "AC97 Mic Capture", .channels_min = 1, diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c index 9a3e55b48129..f9a9e2ebafa1 100644 --- a/sound/soc/pxa/pxa2xx-i2s.c +++ b/sound/soc/pxa/pxa2xx-i2s.c @@ -340,7 +340,6 @@ static int pxa2xx_i2s_resume(struct platform_device *pdev, struct snd_soc_dai pxa_i2s_dai = { .name = "pxa2xx-i2s", .id = 0, - .type = SND_SOC_DAI_I2S, .suspend = pxa2xx_i2s_suspend, .resume = pxa2xx_i2s_resume, .playback = { diff --git a/sound/soc/s3c24xx/neo1973_wm8753.c b/sound/soc/s3c24xx/neo1973_wm8753.c index 528fc3f1b45b..3df2224a6723 100644 --- a/sound/soc/s3c24xx/neo1973_wm8753.c +++ b/sound/soc/s3c24xx/neo1973_wm8753.c @@ -548,7 +548,6 @@ static int neo1973_wm8753_init(struct snd_soc_codec *codec) static struct snd_soc_dai bt_dai = { .name = "Bluetooth", .id = 0, - .type = SND_SOC_DAI_PCM, .playback = { .channels_min = 1, .channels_max = 1, diff --git a/sound/soc/s3c24xx/s3c2412-i2s.c b/sound/soc/s3c24xx/s3c2412-i2s.c index 360cc2a49d9d..1c741047ae35 100644 --- a/sound/soc/s3c24xx/s3c2412-i2s.c +++ b/sound/soc/s3c24xx/s3c2412-i2s.c @@ -713,7 +713,6 @@ static int s3c2412_i2s_resume(struct platform_device *pdev, struct snd_soc_dai s3c2412_i2s_dai = { .name = "s3c2412-i2s", .id = 0, - .type = SND_SOC_DAI_I2S, .probe = s3c2412_i2s_probe, .suspend = s3c2412_i2s_suspend, .resume = s3c2412_i2s_resume, diff --git a/sound/soc/s3c24xx/s3c2443-ac97.c b/sound/soc/s3c24xx/s3c2443-ac97.c index 31377821b2c5..41bde6a3883b 100644 --- a/sound/soc/s3c24xx/s3c2443-ac97.c +++ b/sound/soc/s3c24xx/s3c2443-ac97.c @@ -358,7 +358,7 @@ struct snd_soc_dai s3c2443_ac97_dai[] = { { .name = "s3c2443-ac97", .id = 0, - .type = SND_SOC_DAI_AC97, + .ac97_control = 1, .probe = s3c2443_ac97_probe, .remove = s3c2443_ac97_remove, .playback = { @@ -380,7 +380,7 @@ struct snd_soc_dai s3c2443_ac97_dai[] = { { .name = "pxa2xx-ac97-mic", .id = 1, - .type = SND_SOC_DAI_AC97, + .ac97_control = 1, .capture = { .stream_name = "AC97 Mic Capture", .channels_min = 1, diff --git a/sound/soc/s3c24xx/s3c24xx-i2s.c b/sound/soc/s3c24xx/s3c24xx-i2s.c index 1bac9dd3dbd4..8d9135f41bc9 100644 --- a/sound/soc/s3c24xx/s3c24xx-i2s.c +++ b/sound/soc/s3c24xx/s3c24xx-i2s.c @@ -461,7 +461,6 @@ static int s3c24xx_i2s_resume(struct platform_device *pdev, struct snd_soc_dai s3c24xx_i2s_dai = { .name = "s3c24xx-i2s", .id = 0, - .type = SND_SOC_DAI_I2S, .probe = s3c24xx_i2s_probe, .suspend = s3c24xx_i2s_suspend, .resume = s3c24xx_i2s_resume, diff --git a/sound/soc/sh/hac.c b/sound/soc/sh/hac.c index 3318071dc80f..c435913c60eb 100644 --- a/sound/soc/sh/hac.c +++ b/sound/soc/sh/hac.c @@ -271,7 +271,7 @@ struct snd_soc_dai sh4_hac_dai[] = { { .name = "HAC0", .id = 0, - .type = SND_SOC_DAI_AC97, + .ac97_control = 1, .playback = { .rates = AC97_RATES, .formats = AC97_FMTS, @@ -291,8 +291,8 @@ struct snd_soc_dai sh4_hac_dai[] = { #ifdef CONFIG_CPU_SUBTYPE_SH7760 { .name = "HAC1", + .ac97_control = 1, .id = 1, - .type = SND_SOC_DAI_AC97, .playback = { .rates = AC97_RATES, .formats = AC97_FMTS, diff --git a/sound/soc/sh/ssi.c b/sound/soc/sh/ssi.c index 52a233840d27..fed544a3deff 100644 --- a/sound/soc/sh/ssi.c +++ b/sound/soc/sh/ssi.c @@ -340,7 +340,6 @@ struct snd_soc_dai sh4_ssi_dai[] = { { .name = "SSI0", .id = 0, - .type = SND_SOC_DAI_I2S, .playback = { .rates = SSI_RATES, .formats = SSI_FMTS, @@ -367,7 +366,6 @@ struct snd_soc_dai sh4_ssi_dai[] = { { .name = "SSI1", .id = 1, - .type = SND_SOC_DAI_I2S, .playback = { .rates = SSI_RATES, .formats = SSI_FMTS, diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 43f4060dbe75..0d47696ccd07 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -100,20 +100,6 @@ static int soc_ac97_dev_register(struct snd_soc_codec *codec) } #endif -static inline const char *get_dai_name(int type) -{ - switch (type) { - case SND_SOC_DAI_AC97_BUS: - case SND_SOC_DAI_AC97: - return "AC97"; - case SND_SOC_DAI_I2S: - return "I2S"; - case SND_SOC_DAI_PCM: - return "PCM"; - } - return NULL; -} - /* * Called by ALSA when a PCM substream is opened, the runtime->hw record is * then initialized and any private data can be allocated. This also calls @@ -652,7 +638,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) for (i = 0; i < card->num_links; i++) { struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; - if (cpu_dai->suspend && cpu_dai->type != SND_SOC_DAI_AC97) + if (cpu_dai->suspend && !cpu_dai->ac97_control) cpu_dai->suspend(pdev, cpu_dai); if (platform->suspend) platform->suspend(pdev, cpu_dai); @@ -678,7 +664,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) for (i = 0; i < card->num_links; i++) { struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; - if (cpu_dai->suspend && cpu_dai->type == SND_SOC_DAI_AC97) + if (cpu_dai->suspend && cpu_dai->ac97_control) cpu_dai->suspend(pdev, cpu_dai); } @@ -714,7 +700,7 @@ static void soc_resume_deferred(struct work_struct *work) for (i = 0; i < card->num_links; i++) { struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; - if (cpu_dai->resume && cpu_dai->type == SND_SOC_DAI_AC97) + if (cpu_dai->resume && cpu_dai->ac97_control) cpu_dai->resume(pdev, cpu_dai); } @@ -741,7 +727,7 @@ static void soc_resume_deferred(struct work_struct *work) for (i = 0; i < card->num_links; i++) { struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; - if (cpu_dai->resume && cpu_dai->type != SND_SOC_DAI_AC97) + if (cpu_dai->resume && !cpu_dai->ac97_control) cpu_dai->resume(pdev, cpu_dai); if (platform->resume) platform->resume(pdev, cpu_dai); @@ -898,8 +884,8 @@ static int soc_new_pcm(struct snd_soc_device *socdev, codec_dai->codec = socdev->codec; /* check client and interface hw capabilities */ - sprintf(new_name, "%s %s-%s-%d", dai_link->stream_name, codec_dai->name, - get_dai_name(cpu_dai->type), num); + sprintf(new_name, "%s %s-%d", dai_link->stream_name, codec_dai->name, + num); if (codec_dai->playback.channels_min) playback = 1; @@ -1270,8 +1256,7 @@ int snd_soc_register_card(struct snd_soc_device *socdev) continue; } } - if (card->dai_link[i].codec_dai->type == - SND_SOC_DAI_AC97_BUS) + if (card->dai_link[i].codec_dai->ac97_control) ac97 = 1; } snprintf(codec->card->shortname, sizeof(codec->card->shortname), @@ -1335,7 +1320,7 @@ void snd_soc_free_pcms(struct snd_soc_device *socdev) #ifdef CONFIG_SND_SOC_AC97_BUS for (i = 0; i < codec->num_dai; i++) { codec_dai = &codec->dai[i]; - if (codec_dai->type == SND_SOC_DAI_AC97_BUS && codec->ac97) { + if (codec_dai->ac97_control && codec->ac97) { soc_ac97_dev_unregister(codec); goto free_card; } -- cgit v1.2.3 From 1acdac104668a0834cfa267de9946fac7764d486 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 20 Nov 2008 10:02:53 -0800 Subject: futex: make clock selectable for FUTEX_WAIT_BITSET FUTEX_WAIT_BITSET could be used instead of FUTEX_WAIT by setting the bit set to FUTEX_BITSET_MATCH_ANY, but FUTEX_WAIT uses CLOCK_REALTIME while FUTEX_WAIT_BITSET uses CLOCK_MONOTONIC. Add a flag to select CLOCK_REALTIME for FUTEX_WAIT_BITSET so glibc can replace the FUTEX_WAIT logic which needs to do gettimeofday() calls before and after the syscall to convert the absolute timeout to a relative timeout for FUTEX_WAIT. Signed-off-by: Thomas Gleixner Cc: Ulrich Drepper --- include/linux/futex.h | 3 ++- kernel/futex.c | 24 +++++++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/futex.h b/include/linux/futex.h index 8f627b9ae2b1..3bf5bb5a34f9 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -25,7 +25,8 @@ union ktime; #define FUTEX_WAKE_BITSET 10 #define FUTEX_PRIVATE_FLAG 128 -#define FUTEX_CMD_MASK ~FUTEX_PRIVATE_FLAG +#define FUTEX_CLOCK_REALTIME 256 +#define FUTEX_CMD_MASK ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME) #define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG) diff --git a/kernel/futex.c b/kernel/futex.c index e10c5c8786a6..ba0d3b83c091 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1142,12 +1142,13 @@ handle_fault: * In case we must use restart_block to restart a futex_wait, * we encode in the 'flags' shared capability */ -#define FLAGS_SHARED 1 +#define FLAGS_SHARED 0x01 +#define FLAGS_CLOCKRT 0x02 static long futex_wait_restart(struct restart_block *restart); static int futex_wait(u32 __user *uaddr, int fshared, - u32 val, ktime_t *abs_time, u32 bitset) + u32 val, ktime_t *abs_time, u32 bitset, int clockrt) { struct task_struct *curr = current; DECLARE_WAITQUEUE(wait, curr); @@ -1233,8 +1234,10 @@ static int futex_wait(u32 __user *uaddr, int fshared, slack = current->timer_slack_ns; if (rt_task(current)) slack = 0; - hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, - HRTIMER_MODE_ABS); + hrtimer_init_on_stack(&t.timer, + clockrt ? CLOCK_REALTIME : + CLOCK_MONOTONIC, + HRTIMER_MODE_ABS); hrtimer_init_sleeper(&t, current); hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack); @@ -1289,6 +1292,8 @@ static int futex_wait(u32 __user *uaddr, int fshared, if (fshared) restart->futex.flags |= FLAGS_SHARED; + if (clockrt) + restart->futex.flags |= FLAGS_CLOCKRT; return -ERESTART_RESTARTBLOCK; } @@ -1312,7 +1317,8 @@ static long futex_wait_restart(struct restart_block *restart) if (restart->futex.flags & FLAGS_SHARED) fshared = 1; return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, - restart->futex.bitset); + restart->futex.bitset, + restart->futex.flags & FLAGS_CLOCKRT); } @@ -1905,18 +1911,22 @@ void exit_robust_list(struct task_struct *curr) long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) { - int ret = -ENOSYS; + int clockrt, ret = -ENOSYS; int cmd = op & FUTEX_CMD_MASK; int fshared = 0; if (!(op & FUTEX_PRIVATE_FLAG)) fshared = 1; + clockrt = op & FUTEX_CLOCK_REALTIME; + if (clockrt && cmd != FUTEX_WAIT_BITSET) + return -ENOSYS; + switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; case FUTEX_WAIT_BITSET: - ret = futex_wait(uaddr, fshared, val, timeout, val3); + ret = futex_wait(uaddr, fshared, val, timeout, val3, clockrt); break; case FUTEX_WAKE: val3 = FUTEX_BITSET_MATCH_ANY; -- cgit v1.2.3 From 2e77d89b2fa8e3f8325b8ce7893ec3645f41aff5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Nov 2008 15:52:46 -0800 Subject: net: avoid a pair of dst_hold()/dst_release() in ip_append_data() We can reduce pressure on dst entry refcount that slowdown UDP transmit path on SMP machines. This pressure is visible on RTP servers when delivering content to mediagateways, especially big ones, handling thousand of streams. Several cpus send UDP frames to the same destination, hence use the same dst entry. This patch makes ip_append_data() eventually steal the refcount its callers had to take on the dst entry. This doesnt avoid all refcounting, but still gives speedups on SMP, on UDP/RAW transmit path Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/icmp.c | 8 ++++---- net/ipv4/ip_output.c | 11 ++++++++--- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- 5 files changed, 15 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index bc026ecb513f..ddef10c22e3a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -110,7 +110,7 @@ extern int ip_append_data(struct sock *sk, int odd, struct sk_buff *skb), void *from, int len, int protolen, struct ipcm_cookie *ipc, - struct rtable *rt, + struct rtable **rt, unsigned int flags); extern int ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb); extern ssize_t ip_append_page(struct sock *sk, struct page *page, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 21e497efbd7f..7b88be9803b1 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -321,12 +321,12 @@ static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd, } static void icmp_push_reply(struct icmp_bxm *icmp_param, - struct ipcm_cookie *ipc, struct rtable *rt) + struct ipcm_cookie *ipc, struct rtable **rt) { struct sock *sk; struct sk_buff *skb; - sk = icmp_sk(dev_net(rt->u.dst.dev)); + sk = icmp_sk(dev_net((*rt)->u.dst.dev)); if (ip_append_data(sk, icmp_glue_bits, icmp_param, icmp_param->data_len+icmp_param->head_len, icmp_param->head_len, @@ -392,7 +392,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) } if (icmpv4_xrlim_allow(net, rt, icmp_param->data.icmph.type, icmp_param->data.icmph.code)) - icmp_push_reply(icmp_param, &ipc, rt); + icmp_push_reply(icmp_param, &ipc, &rt); ip_rt_put(rt); out_unlock: icmp_xmit_unlock(sk); @@ -635,7 +635,7 @@ route_done: icmp_param.data_len = room; icmp_param.head_len = sizeof(struct icmphdr); - icmp_push_reply(&icmp_param, &ipc, rt); + icmp_push_reply(&icmp_param, &ipc, &rt); ende: ip_rt_put(rt); out_unlock: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 46d7be233eac..5516825a0751 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -778,7 +778,7 @@ int ip_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - struct ipcm_cookie *ipc, struct rtable *rt, + struct ipcm_cookie *ipc, struct rtable **rtp, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); @@ -793,6 +793,7 @@ int ip_append_data(struct sock *sk, int offset = 0; unsigned int maxfraglen, fragheaderlen; int csummode = CHECKSUM_NONE; + struct rtable *rt; if (flags&MSG_PROBE) return 0; @@ -812,7 +813,11 @@ int ip_append_data(struct sock *sk, inet->cork.flags |= IPCORK_OPT; inet->cork.addr = ipc->addr; } - dst_hold(&rt->u.dst); + rt = *rtp; + /* + * We steal reference to this route, caller should not release it + */ + *rtp = NULL; inet->cork.fragsize = mtu = inet->pmtudisc == IP_PMTUDISC_PROBE ? rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); @@ -1391,7 +1396,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0, - &ipc, rt, MSG_DONTWAIT); + &ipc, &rt, MSG_DONTWAIT); if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) { if (arg->csumoffset >= 0) *((__sum16 *)skb_transport_header(skb) + diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 998fcffc9e15..dff8bc4e0fac 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -572,7 +572,7 @@ back_from_confirm: ipc.addr = rt->rt_dst; lock_sock(sk); err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, - &ipc, rt, msg->msg_flags); + &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index da869ce041d9..549114472db3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -719,7 +719,7 @@ do_append_data: up->len += ulen; getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), &ipc, rt, + sizeof(struct udphdr), &ipc, &rt, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_flush_pending_frames(sk); -- cgit v1.2.3 From 18b6e0414e42d95183f07d8177e3ff0241abd825 Mon Sep 17 00:00:00 2001 From: Serge Hallyn Date: Wed, 15 Oct 2008 16:38:45 -0500 Subject: User namespaces: set of cleanups (v2) The user_ns is moved from nsproxy to user_struct, so that a struct cred by itself is sufficient to determine access (which it otherwise would not be). Corresponding ecryptfs fixes (by David Howells) are here as well. Fix refcounting. The following rules now apply: 1. The task pins the user struct. 2. The user struct pins its user namespace. 3. The user namespace pins the struct user which created it. User namespaces are cloned during copy_creds(). Unsharing a new user_ns is no longer possible. (We could re-add that, but it'll cause code duplication and doesn't seem useful if PAM doesn't need to clone user namespaces). When a user namespace is created, its first user (uid 0) gets empty keyrings and a clean group_info. This incorporates a previous patch by David Howells. Here is his original patch description: >I suggest adding the attached incremental patch. It makes the following >changes: > > (1) Provides a current_user_ns() macro to wrap accesses to current's user > namespace. > > (2) Fixes eCryptFS. > > (3) Renames create_new_userns() to create_user_ns() to be more consistent > with the other associated functions and because the 'new' in the name is > superfluous. > > (4) Moves the argument and permission checks made for CLONE_NEWUSER to the > beginning of do_fork() so that they're done prior to making any attempts > at allocation. > > (5) Calls create_user_ns() after prepare_creds(), and gives it the new creds > to fill in rather than have it return the new root user. I don't imagine > the new root user being used for anything other than filling in a cred > struct. > > This also permits me to get rid of a get_uid() and a free_uid(), as the > reference the creds were holding on the old user_struct can just be > transferred to the new namespace's creator pointer. > > (6) Makes create_user_ns() reset the UIDs and GIDs of the creds under > preparation rather than doing it in copy_creds(). > >David >Signed-off-by: David Howells Changelog: Oct 20: integrate dhowells comments 1. leave thread_keyring alone 2. use current_user_ns() in set_user() Signed-off-by: Serge Hallyn --- fs/ecryptfs/messaging.c | 13 ++++---- fs/ecryptfs/miscdev.c | 19 ++++------- include/linux/cred.h | 2 ++ include/linux/init_task.h | 1 - include/linux/nsproxy.h | 1 - include/linux/sched.h | 1 + include/linux/user_namespace.h | 13 +++----- kernel/cred.c | 15 +++++++-- kernel/fork.c | 19 +++++++++-- kernel/nsproxy.c | 15 ++------- kernel/sys.c | 4 +-- kernel/user.c | 47 ++++++++------------------ kernel/user_namespace.c | 75 +++++++++++++++++------------------------- 13 files changed, 96 insertions(+), 129 deletions(-) (limited to 'include') diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index e0b0a4e28b9b..6913f727624d 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -360,7 +360,7 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, struct ecryptfs_msg_ctx *msg_ctx; size_t msg_size; struct nsproxy *nsproxy; - struct user_namespace *current_user_ns; + struct user_namespace *tsk_user_ns; uid_t ctx_euid; int rc; @@ -385,9 +385,9 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, mutex_unlock(&ecryptfs_daemon_hash_mux); goto wake_up; } - current_user_ns = nsproxy->user_ns; + tsk_user_ns = __task_cred(msg_ctx->task)->user->user_ns; ctx_euid = task_euid(msg_ctx->task); - rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, current_user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, tsk_user_ns); rcu_read_unlock(); mutex_unlock(&ecryptfs_daemon_hash_mux); if (rc) { @@ -405,11 +405,11 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, euid, ctx_euid); goto unlock; } - if (current_user_ns != user_ns) { + if (tsk_user_ns != user_ns) { rc = -EBADMSG; printk(KERN_WARNING "%s: Received message from user_ns " "[0x%p]; expected message from user_ns [0x%p]\n", - __func__, user_ns, nsproxy->user_ns); + __func__, user_ns, tsk_user_ns); goto unlock; } if (daemon->pid != pid) { @@ -468,8 +468,7 @@ ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, uid_t euid = current_euid(); int rc; - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); if (rc || !daemon) { rc = -ENOTCONN; printk(KERN_ERR "%s: User [%d] does not have a daemon " diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 047ac609695b..efd95a0ed1ea 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -47,8 +47,7 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt) mutex_lock(&ecryptfs_daemon_hash_mux); /* TODO: Just use file->private_data? */ - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); BUG_ON(rc || !daemon); mutex_lock(&daemon->mux); mutex_unlock(&ecryptfs_daemon_hash_mux); @@ -95,11 +94,9 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file) "count; rc = [%d]\n", __func__, rc); goto out_unlock_daemon_list; } - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); if (rc || !daemon) { - rc = ecryptfs_spawn_daemon(&daemon, euid, - current->nsproxy->user_ns, + rc = ecryptfs_spawn_daemon(&daemon, euid, current_user_ns(), task_pid(current)); if (rc) { printk(KERN_ERR "%s: Error attempting to spawn daemon; " @@ -153,8 +150,7 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file) int rc; mutex_lock(&ecryptfs_daemon_hash_mux); - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); BUG_ON(rc || !daemon); mutex_lock(&daemon->mux); BUG_ON(daemon->pid != task_pid(current)); @@ -254,8 +250,7 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, mutex_lock(&ecryptfs_daemon_hash_mux); /* TODO: Just use file->private_data? */ - rc = ecryptfs_find_daemon_by_euid(&daemon, euid, - current->nsproxy->user_ns); + rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns()); BUG_ON(rc || !daemon); mutex_lock(&daemon->mux); if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { @@ -295,7 +290,7 @@ check_list: goto check_list; } BUG_ON(euid != daemon->euid); - BUG_ON(current->nsproxy->user_ns != daemon->user_ns); + BUG_ON(current_user_ns() != daemon->user_ns); BUG_ON(task_pid(current) != daemon->pid); msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue, struct ecryptfs_msg_ctx, daemon_out_list); @@ -468,7 +463,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf, goto out_free; } rc = ecryptfs_miscdev_response(&data[i], packet_size, - euid, current->nsproxy->user_ns, + euid, current_user_ns(), task_pid(current), seq); if (rc) printk(KERN_WARNING "%s: Failed to deliver miscdev " diff --git a/include/linux/cred.h b/include/linux/cred.h index 26c1ab179946..3282ee4318e7 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -60,6 +60,7 @@ do { \ } while (0) extern struct group_info *groups_alloc(int); +extern struct group_info init_groups; extern void groups_free(struct group_info *); extern int set_current_groups(struct group_info *); extern int set_groups(struct cred *, struct group_info *); @@ -315,6 +316,7 @@ static inline void put_cred(const struct cred *_cred) #define current_fsgid() (current_cred_xxx(fsgid)) #define current_cap() (current_cred_xxx(cap_effective)) #define current_user() (current_cred_xxx(user)) +#define current_user_ns() (current_cred_xxx(user)->user_ns) #define current_security() (current_cred_xxx(security)) #define current_uid_gid(_uid, _gid) \ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 2597858035cd..959f5522d10a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -57,7 +57,6 @@ extern struct nsproxy init_nsproxy; .mnt_ns = NULL, \ INIT_NET_NS(net_ns) \ INIT_IPC_NS(ipc_ns) \ - .user_ns = &init_user_ns, \ } #define INIT_SIGHAND(sighand) { \ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index c8a768e59640..afad7dec1b36 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -27,7 +27,6 @@ struct nsproxy { struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns; - struct user_namespace *user_ns; struct net *net_ns; }; extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index 2036e9f26020..7f8015a3082e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -638,6 +638,7 @@ struct user_struct { /* Hash table maintenance information */ struct hlist_node uidhash_node; uid_t uid; + struct user_namespace *user_ns; #ifdef CONFIG_USER_SCHED struct task_group *tg; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index b5f41d4c2eec..315bcd375224 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -12,7 +12,7 @@ struct user_namespace { struct kref kref; struct hlist_head uidhash_table[UIDHASH_SZ]; - struct user_struct *root_user; + struct user_struct *creator; }; extern struct user_namespace init_user_ns; @@ -26,8 +26,7 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) return ns; } -extern struct user_namespace *copy_user_ns(int flags, - struct user_namespace *old_ns); +extern int create_user_ns(struct cred *new); extern void free_user_ns(struct kref *kref); static inline void put_user_ns(struct user_namespace *ns) @@ -43,13 +42,9 @@ static inline struct user_namespace *get_user_ns(struct user_namespace *ns) return &init_user_ns; } -static inline struct user_namespace *copy_user_ns(int flags, - struct user_namespace *old_ns) +static inline int create_user_ns(struct cred *new) { - if (flags & CLONE_NEWUSER) - return ERR_PTR(-EINVAL); - - return old_ns; + return -EINVAL; } static inline void put_user_ns(struct user_namespace *ns) diff --git a/kernel/cred.c b/kernel/cred.c index 13697ca2bb38..ff7bc071991c 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -274,6 +274,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) struct thread_group_cred *tgcred; #endif struct cred *new; + int ret; mutex_init(&p->cred_exec_mutex); @@ -293,6 +294,12 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) if (!new) return -ENOMEM; + if (clone_flags & CLONE_NEWUSER) { + ret = create_user_ns(new); + if (ret < 0) + goto error_put; + } + #ifdef CONFIG_KEYS /* new threads get their own thread keyrings if their parent already * had one */ @@ -309,8 +316,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) if (!(clone_flags & CLONE_THREAD)) { tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); if (!tgcred) { - put_cred(new); - return -ENOMEM; + ret = -ENOMEM; + goto error_put; } atomic_set(&tgcred->usage, 1); spin_lock_init(&tgcred->lock); @@ -325,6 +332,10 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) atomic_inc(&new->user->processes); p->cred = p->real_cred = get_cred(new); return 0; + +error_put: + put_cred(new); + return ret; } /** diff --git a/kernel/fork.c b/kernel/fork.c index 29c18c14812d..1dd89451fae4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -976,7 +976,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (atomic_read(&p->real_cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->real_cred->user != current->nsproxy->user_ns->root_user) + p->real_cred->user != INIT_USER) goto bad_fork_free; } @@ -1334,6 +1334,20 @@ long do_fork(unsigned long clone_flags, int trace = 0; long nr; + /* + * Do some preliminary argument and permissions checking before we + * actually start allocating stuff + */ + if (clone_flags & CLONE_NEWUSER) { + if (clone_flags & CLONE_THREAD) + return -EINVAL; + /* hopefully this check will go away when userns support is + * complete + */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + } + /* * We hope to recycle these flags after 2.6.26 */ @@ -1581,8 +1595,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) err = -EINVAL; if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| - CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER| - CLONE_NEWNET)) + CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET)) goto bad_unshare_out; /* diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 1d3ef29a2583..63598dca2d0c 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -80,12 +80,6 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, goto out_pid; } - new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns); - if (IS_ERR(new_nsp->user_ns)) { - err = PTR_ERR(new_nsp->user_ns); - goto out_user; - } - new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns); if (IS_ERR(new_nsp->net_ns)) { err = PTR_ERR(new_nsp->net_ns); @@ -95,9 +89,6 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, return new_nsp; out_net: - if (new_nsp->user_ns) - put_user_ns(new_nsp->user_ns); -out_user: if (new_nsp->pid_ns) put_pid_ns(new_nsp->pid_ns); out_pid: @@ -130,7 +121,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) get_nsproxy(old_ns); if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | - CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET))) + CLONE_NEWPID | CLONE_NEWNET))) return 0; if (!capable(CAP_SYS_ADMIN)) { @@ -173,8 +164,6 @@ void free_nsproxy(struct nsproxy *ns) put_ipc_ns(ns->ipc_ns); if (ns->pid_ns) put_pid_ns(ns->pid_ns); - if (ns->user_ns) - put_user_ns(ns->user_ns); put_net(ns->net_ns); kmem_cache_free(nsproxy_cachep, ns); } @@ -189,7 +178,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, int err = 0; if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | - CLONE_NEWUSER | CLONE_NEWNET))) + CLONE_NEWNET))) return 0; if (!capable(CAP_SYS_ADMIN)) diff --git a/kernel/sys.c b/kernel/sys.c index ab735040468a..ebe65c2c9873 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -565,13 +565,13 @@ static int set_user(struct cred *new) { struct user_struct *new_user; - new_user = alloc_uid(current->nsproxy->user_ns, new->uid); + new_user = alloc_uid(current_user_ns(), new->uid); if (!new_user) return -EAGAIN; if (atomic_read(&new_user->processes) >= current->signal->rlim[RLIMIT_NPROC].rlim_cur && - new_user != current->nsproxy->user_ns->root_user) { + new_user != INIT_USER) { free_uid(new_user); return -EAGAIN; } diff --git a/kernel/user.c b/kernel/user.c index d476307dd4b0..c0ef3a464438 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -20,9 +20,9 @@ struct user_namespace init_user_ns = { .kref = { - .refcount = ATOMIC_INIT(2), + .refcount = ATOMIC_INIT(1), }, - .root_user = &root_user, + .creator = &root_user, }; EXPORT_SYMBOL_GPL(init_user_ns); @@ -48,12 +48,14 @@ static struct kmem_cache *uid_cachep; */ static DEFINE_SPINLOCK(uidhash_lock); +/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */ struct user_struct root_user = { - .__count = ATOMIC_INIT(1), + .__count = ATOMIC_INIT(2), .processes = ATOMIC_INIT(1), .files = ATOMIC_INIT(0), .sigpending = ATOMIC_INIT(0), .locked_shm = 0, + .user_ns = &init_user_ns, #ifdef CONFIG_USER_SCHED .tg = &init_task_group, #endif @@ -314,12 +316,13 @@ done: * IRQ state (as stored in flags) is restored and uidhash_lock released * upon function exit. */ -static inline void free_user(struct user_struct *up, unsigned long flags) +static void free_user(struct user_struct *up, unsigned long flags) { /* restore back the count */ atomic_inc(&up->__count); spin_unlock_irqrestore(&uidhash_lock, flags); + put_user_ns(up->user_ns); INIT_WORK(&up->work, remove_user_sysfs_dir); schedule_work(&up->work); } @@ -335,13 +338,14 @@ static inline void uids_mutex_unlock(void) { } * IRQ state (as stored in flags) is restored and uidhash_lock released * upon function exit. */ -static inline void free_user(struct user_struct *up, unsigned long flags) +static void free_user(struct user_struct *up, unsigned long flags) { uid_hash_remove(up); spin_unlock_irqrestore(&uidhash_lock, flags); sched_destroy_user(up); key_put(up->uid_keyring); key_put(up->session_keyring); + put_user_ns(up->user_ns); kmem_cache_free(uid_cachep, up); } @@ -357,7 +361,7 @@ struct user_struct *find_user(uid_t uid) { struct user_struct *ret; unsigned long flags; - struct user_namespace *ns = current->nsproxy->user_ns; + struct user_namespace *ns = current_user()->user_ns; spin_lock_irqsave(&uidhash_lock, flags); ret = uid_hash_find(uid, uidhashentry(ns, uid)); @@ -404,6 +408,8 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) if (sched_create_user(new) < 0) goto out_free_user; + new->user_ns = get_user_ns(ns); + if (uids_user_create(new)) goto out_destoy_sched; @@ -427,7 +433,6 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) up = new; } spin_unlock_irq(&uidhash_lock); - } uids_mutex_unlock(); @@ -436,6 +441,7 @@ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid) out_destoy_sched: sched_destroy_user(new); + put_user_ns(new->user_ns); out_free_user: kmem_cache_free(uid_cachep, new); out_unlock: @@ -443,33 +449,6 @@ out_unlock: return NULL; } -#ifdef CONFIG_USER_NS -void release_uids(struct user_namespace *ns) -{ - int i; - unsigned long flags; - struct hlist_head *head; - struct hlist_node *nd; - - spin_lock_irqsave(&uidhash_lock, flags); - /* - * collapse the chains so that the user_struct-s will - * be still alive, but not in hashes. subsequent free_uid() - * will free them. - */ - for (i = 0; i < UIDHASH_SZ; i++) { - head = ns->uidhash_table + i; - while (!hlist_empty(head)) { - nd = head->first; - hlist_del_init(nd); - } - } - spin_unlock_irqrestore(&uidhash_lock, flags); - - free_uid(ns->root_user); -} -#endif - static int __init uid_cache_init(void) { int n; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 0d9c51d67333..79084311ee57 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -9,70 +9,55 @@ #include #include #include +#include /* - * Clone a new ns copying an original user ns, setting refcount to 1 - * @old_ns: namespace to clone - * Return NULL on error (failure to kmalloc), new ns otherwise + * Create a new user namespace, deriving the creator from the user in the + * passed credentials, and replacing that user with the new root user for the + * new namespace. + * + * This is called by copy_creds(), which will finish setting the target task's + * credentials. */ -static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) +int create_user_ns(struct cred *new) { struct user_namespace *ns; - struct user_struct *new_user; - struct cred *new; + struct user_struct *root_user; int n; ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); if (!ns) - return ERR_PTR(-ENOMEM); + return -ENOMEM; kref_init(&ns->kref); for (n = 0; n < UIDHASH_SZ; ++n) INIT_HLIST_HEAD(ns->uidhash_table + n); - /* Insert new root user. */ - ns->root_user = alloc_uid(ns, 0); - if (!ns->root_user) { + /* Alloc new root user. */ + root_user = alloc_uid(ns, 0); + if (!root_user) { kfree(ns); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } - /* Reset current->user with a new one */ - new_user = alloc_uid(ns, current_uid()); - if (!new_user) { - free_uid(ns->root_user); - kfree(ns); - return ERR_PTR(-ENOMEM); - } - - /* Install the new user */ - new = prepare_creds(); - if (!new) { - free_uid(new_user); - free_uid(ns->root_user); - kfree(ns); - } - free_uid(new->user); - new->user = new_user; - commit_creds(new); - return ns; -} - -struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns) -{ - struct user_namespace *new_ns; - - BUG_ON(!old_ns); - get_user_ns(old_ns); - - if (!(flags & CLONE_NEWUSER)) - return old_ns; + /* set the new root user in the credentials under preparation */ + ns->creator = new->user; + new->user = root_user; + new->uid = new->euid = new->suid = new->fsuid = 0; + new->gid = new->egid = new->sgid = new->fsgid = 0; + put_group_info(new->group_info); + new->group_info = get_group_info(&init_groups); +#ifdef CONFIG_KEYS + key_put(new->request_key_auth); + new->request_key_auth = NULL; +#endif + /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ - new_ns = clone_user_ns(old_ns); + /* alloc_uid() incremented the userns refcount. Just set it to 1 */ + kref_set(&ns->kref, 1); - put_user_ns(old_ns); - return new_ns; + return 0; } void free_user_ns(struct kref *kref) @@ -80,7 +65,7 @@ void free_user_ns(struct kref *kref) struct user_namespace *ns; ns = container_of(kref, struct user_namespace, kref); - release_uids(ns); + free_uid(ns->creator); kfree(ns); } EXPORT_SYMBOL(free_user_ns); -- cgit v1.2.3 From e1aa680fa40e7492260a09cb57d94002245cc8fe Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:11:55 -0800 Subject: tcp: move tcp_simple_retransmit to tcp_input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- net/ipv4/tcp_input.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++- net/ipv4/tcp_output.c | 50 ------------------------------------------------ 3 files changed, 52 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 8f26b28fb407..90b4c3b4c336 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -472,8 +472,6 @@ extern void tcp_send_delayed_ack(struct sock *sk); /* tcp_input.c */ extern void tcp_cwnd_application_limited(struct sock *sk); -extern void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, - struct sk_buff *skb); /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 097294b7da3e..8085704863fb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1002,7 +1002,8 @@ static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) } } -void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) +static void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, + struct sk_buff *skb) { tcp_verify_retransmit_hint(tp, skb); @@ -2559,6 +2560,56 @@ static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } +/* Do a simple retransmit without using the backoff mechanisms in + * tcp_timer. This is used for path mtu discovery. + * The socket is already locked here. + */ +void tcp_simple_retransmit(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + unsigned int mss = tcp_current_mss(sk, 0); + u32 prior_lost = tp->lost_out; + + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; + if (skb->len > mss && + !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { + TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; + tp->retrans_out -= tcp_skb_pcount(skb); + } + tcp_skb_mark_lost_uncond_verify(tp, skb); + } + } + + tcp_clear_retrans_hints_partial(tp); + + if (prior_lost == tp->lost_out) + return; + + if (tcp_is_reno(tp)) + tcp_limit_reno_sacked(tp); + + tcp_verify_left_out(tp); + + /* Don't muck with the congestion window here. + * Reason is that we do not increase amount of _data_ + * in network, but units changed and effective + * cwnd/ssthresh really reduced now. + */ + if (icsk->icsk_ca_state != TCP_CA_Loss) { + tp->high_seq = tp->snd_nxt; + tp->snd_ssthresh = tcp_current_ssthresh(sk); + tp->prior_ssthresh = 0; + tp->undo_marker = 0; + tcp_set_ca_state(sk, TCP_CA_Loss); + } + tcp_xmit_retransmit_queue(sk); +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 86ef98975e94..c069ecb81ea5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1879,56 +1879,6 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, } } -/* Do a simple retransmit without using the backoff mechanisms in - * tcp_timer. This is used for path mtu discovery. - * The socket is already locked here. - */ -void tcp_simple_retransmit(struct sock *sk) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - unsigned int mss = tcp_current_mss(sk, 0); - u32 prior_lost = tp->lost_out; - - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (skb->len > mss && - !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { - TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out -= tcp_skb_pcount(skb); - } - tcp_skb_mark_lost_uncond_verify(tp, skb); - } - } - - tcp_clear_retrans_hints_partial(tp); - - if (prior_lost == tp->lost_out) - return; - - if (tcp_is_reno(tp)) - tcp_limit_reno_sacked(tp); - - tcp_verify_left_out(tp); - - /* Don't muck with the congestion window here. - * Reason is that we do not increase amount of _data_ - * in network, but units changed and effective - * cwnd/ssthresh really reduced now. - */ - if (icsk->icsk_ca_state != TCP_CA_Loss) { - tp->high_seq = tp->snd_nxt; - tp->snd_ssthresh = tcp_current_ssthresh(sk); - tp->prior_ssthresh = 0; - tp->undo_marker = 0; - tcp_set_ca_state(sk, TCP_CA_Loss); - } - tcp_xmit_retransmit_queue(sk); -} - /* This retransmits one SKB. Policy decisions and retransmit queue * state updates are done by the caller. Returns non-zero if an * error occurred which prevented the send. -- cgit v1.2.3 From 832d11c5cd076abc0aa1eaf7be96c81d1a59ce41 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:20:15 -0800 Subject: tcp: Try to restore large SKBs while SACK processing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During SACK processing, most of the benefits of TSO are eaten by the SACK blocks that one-by-one fragment SKBs to MSS sized chunks. Then we're in problems when cleanup work for them has to be done when a large cumulative ACK comes. Try to return back to pre-split state already while more and more SACK info gets discovered by combining newly discovered SACK areas with the previous skb if that's SACKed as well. This approach has a number of benefits: 1) The processing overhead is spread more equally over the RTT 2) Write queue has less skbs to process (affect everything which has to walk in the queue past the sacked areas) 3) Write queue is consistent whole the time, so no other parts of TCP has to be aware of this (this was not the case with some other approach that was, well, quite intrusive all around). 4) Clean_rtx_queue can release most of the pages using single put_page instead of previous PAGE_SIZE/mss+1 calls In case a hole is fully filled by the new SACK block, we attempt to combine the next skb too which allows construction of skbs that are even larger than what tso split them to and it handles hole per on every nth patterns that often occur during slow start overshoot pretty nicely. Though this to be really useful also a retransmission would have to get lost since cumulative ACKs advance one hole at a time in the most typical case. TODO: handle upwards only merging. That should be rather easy when segment is fully sacked but I'm leaving that as future work item (it won't make very large difference anyway since this current approach already covers quite a lot of normal cases). I was earlier thinking of some sophisticated way of tracking timestamps of the first and the last segment but later on realized that it won't be that necessary at all to store the timestamp of the last segment. The cases that can occur are basically either: 1) ambiguous => no sensible measurement can be taken anyway 2) non-ambiguous is due to reordering => having the timestamp of the last segment there is just skewing things more off than does some good since the ack got triggered by one of the holes (besides some substle issues that would make determining right hole/skb even harder problem). Anyway, it has nothing to do with this change then. I choose to route some abnormal looking cases with goto noop, some could be handled differently (eg., by stopping the walking at that skb but again). In general, they either shouldn't happen at all or are rare enough to make no difference in practice. In theory this change (as whole) could cause some macroscale regression (global) because of cache misses that are taken over the round-trip time but it gets very likely better because of much less (local) cache misses per other write queue walkers and the big recovery clearing cumulative ack. Worth to note that these benefits would be very easy to get also without TSO/GSO being on as long as the data is in pages so that we can merge them. Currently I won't let that happen because DSACK splitting at fragment that would mess up pcounts due to sk_can_gso in tcp_set_skb_tso_segs. Once DSACKs fragments gets avoided, we have some conditions that can be made less strict. TODO: I will probably have to convert the excessive pointer passing to struct sacktag_state... :-) My testing revealed that considerable amount of skbs couldn't be shifted because they were cloned (most likely still awaiting tx reclaim)... [The rest is considering future work instead since I got repeatably EFAULT to tcpdump's recvfrom when I added pskb_expand_head to deal with clones, so I separated that into another, later patch] ...To counter that, I gave up on the fifth advantage: 5) When growing previous SACK block, less allocs for new skbs are done, basically a new alloc is needed only when new hole is detected and when the previous skb runs out of frags space ...which now only happens of if reclaim is fast enough to dispose the clone before the SACK block comes in (the window is RTT long), otherwise we'll have to alloc some. With clones being handled I got these numbers (will be somewhat worse without that), taken with fine-grained mibs: TCPSackShifted 398 TCPSackMerged 877 TCPSackShiftFallback 320 TCPSACKCOLLAPSEFALLBACKGSO 0 TCPSACKCOLLAPSEFALLBACKSKBBITS 0 TCPSACKCOLLAPSEFALLBACKSKBDATA 0 TCPSACKCOLLAPSEFALLBACKBELOW 0 TCPSACKCOLLAPSEFALLBACKFIRST 1 TCPSACKCOLLAPSEFALLBACKPREVBITS 318 TCPSACKCOLLAPSEFALLBACKMSS 1 TCPSACKCOLLAPSEFALLBACKNOHEAD 0 TCPSACKCOLLAPSEFALLBACKSHIFT 0 TCPSACKCOLLAPSENOOPSEQ 0 TCPSACKCOLLAPSENOOPSMALLPCOUNT 0 TCPSACKCOLLAPSENOOPSMALLLEN 0 TCPSACKCOLLAPSEHOLE 12 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/skbuff.h | 33 +++++++ include/net/tcp.h | 5 + net/core/skbuff.c | 140 +++++++++++++++++++++++++++ net/ipv4/tcp_input.c | 256 +++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 427 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a01b6f84e3bc..acf17af45af9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -492,6 +492,19 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, return (skb->next == (struct sk_buff *) list); } +/** + * skb_queue_is_first - check if skb is the first entry in the queue + * @list: queue head + * @skb: buffer + * + * Returns true if @skb is the first buffer on the list. + */ +static inline bool skb_queue_is_first(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + return (skb->prev == (struct sk_buff *) list); +} + /** * skb_queue_next - return the next packet in the queue * @list: queue head @@ -510,6 +523,24 @@ static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list, return skb->next; } +/** + * skb_queue_prev - return the prev packet in the queue + * @list: queue head + * @skb: current buffer + * + * Return the prev packet in @list before @skb. It is only valid to + * call this if skb_queue_is_first() evaluates to false. + */ +static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list, + const struct sk_buff *skb) +{ + /* This BUG_ON may seem severe, but if we just return then we + * are going to dereference garbage. + */ + BUG_ON(skb_queue_is_first(list, skb)); + return skb->prev; +} + /** * skb_get - reference buffer * @skb: buffer to reference @@ -1652,6 +1683,8 @@ extern int skb_splice_bits(struct sk_buff *skb, extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); extern void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); +extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, + int shiftlen); extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); diff --git a/include/net/tcp.h b/include/net/tcp.h index 90b4c3b4c336..265392470b26 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1192,6 +1192,11 @@ static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_bu return skb_queue_next(&sk->sk_write_queue, skb); } +static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_buff *skb) +{ + return skb_queue_prev(&sk->sk_write_queue, skb); +} + #define tcp_for_write_queue(skb, sk) \ skb_queue_walk(&(sk)->sk_write_queue, skb) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 267185a848f6..844b8abeb18c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2018,6 +2018,146 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) skb_split_no_header(skb, skb1, len, pos); } +/* Shifting from/to a cloned skb is a no-go. + * + * TODO: handle cloned skbs by using pskb_expand_head() + */ +static int skb_prepare_for_shift(struct sk_buff *skb) +{ + return skb_cloned(skb); +} + +/** + * skb_shift - Shifts paged data partially from skb to another + * @tgt: buffer into which tail data gets added + * @skb: buffer from which the paged data comes from + * @shiftlen: shift up to this many bytes + * + * Attempts to shift up to shiftlen worth of bytes, which may be less than + * the length of the skb, from tgt to skb. Returns number bytes shifted. + * It's up to caller to free skb if everything was shifted. + * + * If @tgt runs out of frags, the whole operation is aborted. + * + * Skb cannot include anything else but paged data while tgt is allowed + * to have non-paged data as well. + * + * TODO: full sized shift could be optimized but that would need + * specialized skb free'er to handle frags without up-to-date nr_frags. + */ +int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) +{ + int from, to, merge, todo; + struct skb_frag_struct *fragfrom, *fragto; + + BUG_ON(shiftlen > skb->len); + BUG_ON(skb_headlen(skb)); /* Would corrupt stream */ + + todo = shiftlen; + from = 0; + to = skb_shinfo(tgt)->nr_frags; + fragfrom = &skb_shinfo(skb)->frags[from]; + + /* Actual merge is delayed until the point when we know we can + * commit all, so that we don't have to undo partial changes + */ + if (!to || + !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) { + merge = -1; + } else { + merge = to - 1; + + todo -= fragfrom->size; + if (todo < 0) { + if (skb_prepare_for_shift(skb) || + skb_prepare_for_shift(tgt)) + return 0; + + fragto = &skb_shinfo(tgt)->frags[merge]; + + fragto->size += shiftlen; + fragfrom->size -= shiftlen; + fragfrom->page_offset += shiftlen; + + goto onlymerged; + } + + from++; + } + + /* Skip full, not-fitting skb to avoid expensive operations */ + if ((shiftlen == skb->len) && + (skb_shinfo(skb)->nr_frags - from) > (MAX_SKB_FRAGS - to)) + return 0; + + if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) + return 0; + + while ((todo > 0) && (from < skb_shinfo(skb)->nr_frags)) { + if (to == MAX_SKB_FRAGS) + return 0; + + fragfrom = &skb_shinfo(skb)->frags[from]; + fragto = &skb_shinfo(tgt)->frags[to]; + + if (todo >= fragfrom->size) { + *fragto = *fragfrom; + todo -= fragfrom->size; + from++; + to++; + + } else { + get_page(fragfrom->page); + fragto->page = fragfrom->page; + fragto->page_offset = fragfrom->page_offset; + fragto->size = todo; + + fragfrom->page_offset += todo; + fragfrom->size -= todo; + todo = 0; + + to++; + break; + } + } + + /* Ready to "commit" this state change to tgt */ + skb_shinfo(tgt)->nr_frags = to; + + if (merge >= 0) { + fragfrom = &skb_shinfo(skb)->frags[0]; + fragto = &skb_shinfo(tgt)->frags[merge]; + + fragto->size += fragfrom->size; + put_page(fragfrom->page); + } + + /* Reposition in the original skb */ + to = 0; + while (from < skb_shinfo(skb)->nr_frags) + skb_shinfo(skb)->frags[to++] = skb_shinfo(skb)->frags[from++]; + skb_shinfo(skb)->nr_frags = to; + + BUG_ON(todo > 0 && !skb_shinfo(skb)->nr_frags); + +onlymerged: + /* Most likely the tgt won't ever need its checksum anymore, skb on + * the other hand might need it if it needs to be resent + */ + tgt->ip_summed = CHECKSUM_PARTIAL; + skb->ip_summed = CHECKSUM_PARTIAL; + + /* Yak, is it really working this way? Some helper please? */ + skb->len -= shiftlen; + skb->data_len -= shiftlen; + skb->truesize -= shiftlen; + tgt->len += shiftlen; + tgt->data_len += shiftlen; + tgt->truesize += shiftlen; + + return shiftlen; +} + /** * skb_prepare_seq_read - Prepare a sequential read of skb data * @skb: the buffer to read diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3c8e297e2c39..97d57676b8ee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1242,6 +1242,8 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, * aligned portion of it that matches. Therefore we might need to fragment * which may fail and creates some hassle (caller must handle error case * returns). + * + * FIXME: this could be merged to shift decision code */ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, u32 start_seq, u32 end_seq) @@ -1353,9 +1355,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; - - if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) - tcp_advance_highest_sack(sk, skb); } /* D-SACK. We can detect redundant retransmission in S|R and plain R @@ -1370,12 +1369,231 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, return flag; } +static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, + struct sk_buff *skb, unsigned int pcount, + int shifted, int fack_count, int *reord, + int *flag, int mss) +{ + struct tcp_sock *tp = tcp_sk(sk); + u8 dummy_sacked = TCP_SKB_CB(skb)->sacked; /* We discard results */ + + BUG_ON(!pcount); + + TCP_SKB_CB(prev)->end_seq += shifted; + TCP_SKB_CB(skb)->seq += shifted; + + skb_shinfo(prev)->gso_segs += pcount; + BUG_ON(skb_shinfo(skb)->gso_segs < pcount); + skb_shinfo(skb)->gso_segs -= pcount; + + /* When we're adding to gso_segs == 1, gso_size will be zero, + * in theory this shouldn't be necessary but as long as DSACK + * code can come after this skb later on it's better to keep + * setting gso_size to something. + */ + if (!skb_shinfo(prev)->gso_size) { + skb_shinfo(prev)->gso_size = mss; + skb_shinfo(prev)->gso_type = sk->sk_gso_type; + } + + /* CHECKME: To clear or not to clear? Mimics normal skb currently */ + if (skb_shinfo(skb)->gso_segs <= 1) { + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; + } + + *flag |= tcp_sacktag_one(skb, sk, reord, 0, fack_count, &dummy_sacked, + pcount); + + /* Difference in this won't matter, both ACKed by the same cumul. ACK */ + TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); + + tcp_clear_all_retrans_hints(tp); + + if (skb->len > 0) { + BUG_ON(!tcp_skb_pcount(skb)); + return 0; + } + + /* Whole SKB was eaten :-) */ + + TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(prev)->flags; + if (skb == tcp_highest_sack(sk)) + tcp_advance_highest_sack(sk, skb); + + tcp_unlink_write_queue(skb, sk); + sk_wmem_free_skb(sk, skb); + + return 1; +} + +/* I wish gso_size would have a bit more sane initialization than + * something-or-zero which complicates things + */ +static int tcp_shift_mss(struct sk_buff *skb) +{ + int mss = tcp_skb_mss(skb); + + if (!mss) + mss = skb->len; + + return mss; +} + +/* Shifting pages past head area doesn't work */ +static int skb_can_shift(struct sk_buff *skb) +{ + return !skb_headlen(skb) && skb_is_nonlinear(skb); +} + +/* Try collapsing SACK blocks spanning across multiple skbs to a single + * skb. + */ +static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, + u32 start_seq, u32 end_seq, + int dup_sack, int *fack_count, + int *reord, int *flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *prev; + int mss; + int pcount = 0; + int len; + int in_sack; + + if (!sk_can_gso(sk)) + goto fallback; + + /* Normally R but no L won't result in plain S */ + if (!dup_sack && + (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) == TCPCB_SACKED_RETRANS) + goto fallback; + if (!skb_can_shift(skb)) + goto fallback; + /* This frame is about to be dropped (was ACKed). */ + if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) + goto fallback; + + /* Can only happen with delayed DSACK + discard craziness */ + if (unlikely(skb == tcp_write_queue_head(sk))) + goto fallback; + prev = tcp_write_queue_prev(sk, skb); + + if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) + goto fallback; + + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && + !before(end_seq, TCP_SKB_CB(skb)->end_seq); + + if (in_sack) { + len = skb->len; + pcount = tcp_skb_pcount(skb); + mss = tcp_shift_mss(skb); + + /* TODO: Fix DSACKs to not fragment already SACKed and we can + * drop this restriction as unnecessary + */ + if (mss != tcp_shift_mss(prev)) + goto fallback; + } else { + if (!after(TCP_SKB_CB(skb)->end_seq, start_seq)) + goto noop; + /* CHECKME: This is non-MSS split case only?, this will + * cause skipped skbs due to advancing loop btw, original + * has that feature too + */ + if (tcp_skb_pcount(skb) <= 1) + goto noop; + + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq); + if (!in_sack) { + /* TODO: head merge to next could be attempted here + * if (!after(TCP_SKB_CB(skb)->end_seq, end_seq)), + * though it might not be worth of the additional hassle + * + * ...we can probably just fallback to what was done + * previously. We could try merging non-SACKed ones + * as well but it probably isn't going to buy off + * because later SACKs might again split them, and + * it would make skb timestamp tracking considerably + * harder problem. + */ + goto fallback; + } + + len = end_seq - TCP_SKB_CB(skb)->seq; + BUG_ON(len < 0); + BUG_ON(len > skb->len); + + /* MSS boundaries should be honoured or else pcount will + * severely break even though it makes things bit trickier. + * Optimize common case to avoid most of the divides + */ + mss = tcp_skb_mss(skb); + + /* TODO: Fix DSACKs to not fragment already SACKed and we can + * drop this restriction as unnecessary + */ + if (mss != tcp_shift_mss(prev)) + goto fallback; + + if (len == mss) { + pcount = 1; + } else if (len < mss) { + goto noop; + } else { + pcount = len / mss; + len = pcount * mss; + } + } + + if (!skb_shift(prev, skb, len)) + goto fallback; + if (!tcp_shifted_skb(sk, prev, skb, pcount, len, *fack_count, reord, + flag, mss)) + goto out; + + /* Hole filled allows collapsing with the next as well, this is very + * useful when hole on every nth skb pattern happens + */ + if (prev == tcp_write_queue_tail(sk)) + goto out; + skb = tcp_write_queue_next(sk, prev); + + if (!skb_can_shift(skb)) + goto out; + if (skb == tcp_send_head(sk)) + goto out; + if ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) + goto out; + + len = skb->len; + if (skb_shift(prev, skb, len)) { + pcount += tcp_skb_pcount(skb); + tcp_shifted_skb(sk, prev, skb, tcp_skb_pcount(skb), len, + *fack_count, reord, flag, mss); + } + +out: + *fack_count += pcount; + return prev; + +noop: + return skb; + +fallback: + return NULL; +} + static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, struct tcp_sack_block *next_dup, u32 start_seq, u32 end_seq, int dup_sack_in, int *fack_count, int *reord, int *flag) { + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *tmp; + tcp_for_write_queue_from(skb, sk) { int in_sack = 0; int dup_sack = dup_sack_in; @@ -1396,18 +1614,42 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, dup_sack = 1; } - if (in_sack <= 0) - in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, - end_seq); + /* skb reference here is a bit tricky to get right, since + * shifting can eat and free both this skb and the next, + * so not even _safe variant of the loop is enough. + */ + if (in_sack <= 0) { + tmp = tcp_shift_skb_data(sk, skb, start_seq, + end_seq, dup_sack, + fack_count, reord, flag); + if (tmp != NULL) { + if (tmp != skb) { + skb = tmp; + continue; + } + + in_sack = 0; + } else { + in_sack = tcp_match_skb_to_sack(sk, skb, + start_seq, + end_seq); + } + } + if (unlikely(in_sack < 0)) break; - if (in_sack) + if (in_sack) { *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack, *fack_count, &(TCP_SKB_CB(skb)->sacked), tcp_skb_pcount(skb)); + if (!before(TCP_SKB_CB(skb)->seq, + tcp_highest_sack_seq(tp))) + tcp_advance_highest_sack(sk, skb); + } + *fack_count += tcp_skb_pcount(skb); } return skb; -- cgit v1.2.3 From 111cc8b913b42ef07793648b1699288332f273e1 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 24 Nov 2008 21:27:22 -0800 Subject: tcp: add some mibs to track collapsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/linux/snmp.h | 3 +++ net/ipv4/proc.c | 3 +++ net/ipv4/tcp_input.c | 4 ++++ 3 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 7a6e6bba4a71..aee3f1e1d1ce 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -216,6 +216,9 @@ enum LINUX_MIB_TCPSPURIOUSRTOS, /* TCPSpuriousRTOs */ LINUX_MIB_TCPMD5NOTFOUND, /* TCPMD5NotFound */ LINUX_MIB_TCPMD5UNEXPECTED, /* TCPMD5Unexpected */ + LINUX_MIB_SACKSHIFTED, + LINUX_MIB_SACKMERGED, + LINUX_MIB_SACKSHIFTFALLBACK, __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index a631a1f110ca..731789bb499f 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -234,6 +234,9 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), + SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), + SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), + SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e6291dde3348..9f8a80ba17bd 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1415,6 +1415,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, if (skb->len > 0) { BUG_ON(!tcp_skb_pcount(skb)); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTED); return 0; } @@ -1436,6 +1437,8 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKMERGED); + return 1; } @@ -1594,6 +1597,7 @@ noop: return skb; fallback: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SACKSHIFTFALLBACK); return NULL; } -- cgit v1.2.3 From 14bfc987e395797dfe03e915e8b4c7fc9e5078e4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 25 Nov 2008 08:58:11 +0100 Subject: tracing, tty: fix warnings caused by branch tracing and tty_kref_get() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stephen Rothwell reported tht this warning started triggering in linux-next: In file included from init/main.c:27: include/linux/tty.h: In function ‘tty_kref_get’: include/linux/tty.h:330: warning: ‘______f’ is static but declared in inline function ‘tty_kref_get’ which is not static Which gcc emits for 'extern inline' functions that nevertheless define static variables. Change it to 'static inline', which is the norm in the kernel anyway. Reported-by: Stephen Rothwell Signed-off-by: Ingo Molnar --- include/linux/tty.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/tty.h b/include/linux/tty.h index 3b8121d4e36f..eaec37c9d83d 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -325,7 +325,7 @@ extern struct class *tty_class; * go away */ -extern inline struct tty_struct *tty_kref_get(struct tty_struct *tty) +static inline struct tty_struct *tty_kref_get(struct tty_struct *tty) { if (tty) kref_get(&tty->kref); -- cgit v1.2.3 From 47fd5b8373ecc6bf5473e4139b62b06425448252 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 25 Nov 2008 00:20:43 -0800 Subject: netdev: add HAVE_NET_DEVICE_OPS As a concession to vendors who have to deal with one source for different kernel versions, add a HAVE_NET_DEVICE_OPS so they don't end up hard coding ifdef against kernel version. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6095af572dfd..76a89f8e6a19 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -545,6 +545,7 @@ struct netdev_queue { * * void (*ndo_poll_controller)(struct net_device *dev); */ +#define HAVE_NET_DEVICE_OPS struct net_device_ops { int (*ndo_init)(struct net_device *dev); void (*ndo_uninit)(struct net_device *dev); -- cgit v1.2.3 From 7a6b6f515f77d1c62a2f383b6dce18cb0af0cf4f Mon Sep 17 00:00:00 2001 From: Jeff Kirsher Date: Tue, 25 Nov 2008 01:02:08 -0800 Subject: DCB: fix kconfig option Since the netlink option for DCB is necessary to actually be useful, simplified the Kconfig option. In addition, added useful help text for the Kconfig option. Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/Kconfig | 4 ++-- drivers/net/ixgbe/Makefile | 2 +- drivers/net/ixgbe/ixgbe.h | 2 +- drivers/net/ixgbe/ixgbe_main.c | 10 +++++----- include/linux/netdevice.h | 4 ++-- net/Makefile | 4 ++-- net/dcb/Kconfig | 24 +++++++++++++++++------- net/dcb/dcbnl.c | 2 +- 8 files changed, 31 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index efd461d7c2bb..75f9f7f2fbed 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2451,10 +2451,10 @@ config IXGBE_DCA driver. DCA is a method for warming the CPU cache before data is used, with the intent of lessening the impact of cache misses. -config IXGBE_DCBNL +config IXGBE_DCB bool "Data Center Bridging (DCB) Support" default n - depends on IXGBE && DCBNL + depends on IXGBE && DCB ---help--- Say Y here if you want to use Data Center Bridging (DCB) in the driver. diff --git a/drivers/net/ixgbe/Makefile b/drivers/net/ixgbe/Makefile index 3228e508e628..6e7ef765bcd8 100644 --- a/drivers/net/ixgbe/Makefile +++ b/drivers/net/ixgbe/Makefile @@ -35,4 +35,4 @@ obj-$(CONFIG_IXGBE) += ixgbe.o ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \ ixgbe_82598.o ixgbe_phy.o -ixgbe-$(CONFIG_IXGBE_DCBNL) += ixgbe_dcb.o ixgbe_dcb_82598.o ixgbe_dcb_nl.o +ixgbe-$(CONFIG_IXGBE_DCB) += ixgbe_dcb.o ixgbe_dcb_82598.o ixgbe_dcb_nl.o diff --git a/drivers/net/ixgbe/ixgbe.h b/drivers/net/ixgbe/ixgbe.h index 9509aee2d361..6cbf26e3db80 100644 --- a/drivers/net/ixgbe/ixgbe.h +++ b/drivers/net/ixgbe/ixgbe.h @@ -327,7 +327,7 @@ enum ixgbe_boards { }; extern struct ixgbe_info ixgbe_82598_info; -#ifdef CONFIG_IXGBE_DCBNL +#ifdef CONFIG_IXGBE_DCB extern struct dcbnl_rtnl_ops dcbnl_ops; extern int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg, struct ixgbe_dcb_config *dst_dcb_cfg, diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 6620397a1fb4..667a6463193d 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -1914,7 +1914,7 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter) } } -#ifdef CONFIG_IXGBE_DCBNL +#ifdef CONFIG_IXGBE_DCB /* * ixgbe_configure_dcb - Configure DCB hardware * @adapter: ixgbe adapter struct @@ -1960,7 +1960,7 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter) ixgbe_set_rx_mode(netdev); ixgbe_restore_vlan(adapter); -#ifdef CONFIG_IXGBE_DCBNL +#ifdef CONFIG_IXGBE_DCB if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { netif_set_gso_max_size(netdev, 32768); ixgbe_configure_dcb(adapter); @@ -2749,7 +2749,7 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; unsigned int rss; -#ifdef CONFIG_IXGBE_DCBNL +#ifdef CONFIG_IXGBE_DCB int j; struct tc_configuration *tc; #endif @@ -2768,7 +2768,7 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) adapter->flags |= IXGBE_FLAG_RSS_ENABLED; adapter->ring_feature[RING_F_DCB].indices = IXGBE_MAX_DCB_INDICES; -#ifdef CONFIG_IXGBE_DCBNL +#ifdef CONFIG_IXGBE_DCB /* Configure DCB traffic classes */ for (j = 0; j < MAX_TRAFFIC_CLASS; j++) { tc = &adapter->dcb_cfg.tc_config[j]; @@ -4120,7 +4120,7 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; -#ifdef CONFIG_IXGBE_DCBNL +#ifdef CONFIG_IXGBE_DCB netdev->dcbnl_ops = &dcbnl_ops; #endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 76a89f8e6a19..0df0db068ac3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -43,7 +43,7 @@ #include #include -#ifdef CONFIG_DCBNL +#ifdef CONFIG_DCB #include #endif @@ -847,7 +847,7 @@ struct net_device #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; -#ifdef CONFIG_DCBNL +#ifdef CONFIG_DCB /* Data Center Bridging netlink ops */ struct dcbnl_rtnl_ops *dcbnl_ops; #endif diff --git a/net/Makefile b/net/Makefile index e5af3dc3a037..ba4460432b7c 100644 --- a/net/Makefile +++ b/net/Makefile @@ -56,8 +56,8 @@ obj-$(CONFIG_NETLABEL) += netlabel/ obj-$(CONFIG_IUCV) += iucv/ obj-$(CONFIG_RFKILL) += rfkill/ obj-$(CONFIG_NET_9P) += 9p/ -ifeq ($(CONFIG_DCBNL),y) -obj-$(CONFIG_DCB) += dcb/ +ifneq ($(CONFIG_DCB),) +obj-y += dcb/ endif ifeq ($(CONFIG_NET),y) diff --git a/net/dcb/Kconfig b/net/dcb/Kconfig index bdf38802d339..4066d59c8de5 100644 --- a/net/dcb/Kconfig +++ b/net/dcb/Kconfig @@ -1,12 +1,22 @@ config DCB - tristate "Data Center Bridging support" - -config DCBNL - bool "Data Center Bridging netlink interface support" - depends on DCB + bool "Data Center Bridging support" default n ---help--- - This option turns on the netlink interface - (dcbnl) for Data Center Bridging capable devices. + This enables support for configuring Data Center Bridging (DCB) + features on DCB capable Ethernet adapters via rtnetlink. Say 'Y' + if you have a DCB capable Ethernet adapter which supports this + interface and you are connected to a DCB capable switch. + + DCB is a collection of Ethernet enhancements which allow DCB capable + NICs and switches to support network traffic with differing + requirements (highly reliable, no drops vs. best effort vs. low + latency) to co-exist on Ethernet. + + DCB features include: + Enhanced Transmission Selection (aka Priority Grouping) - provides a + framework for assigning bandwidth guarantees to traffic classes. + Priority-based Flow Control (PFC) - a MAC control pause frame which + works at the granularity of the 802.1p priority instead of the + link (802.3x). If unsure, say N. diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index b2bda3f610df..79a351d323af 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -48,7 +48,7 @@ */ MODULE_AUTHOR("Lucy Liu, "); -MODULE_DESCRIPTION("Data Center Bridging generic netlink interface"); +MODULE_DESCRIPTION("Data Center Bridging netlink interface"); MODULE_LICENSE("GPL"); /**************** DCB attribute policies *************************************/ -- cgit v1.2.3 From ca109491f612aab5c8152207631c0444f63da97f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Nov 2008 12:43:51 +0100 Subject: hrtimer: removing all ur callback modes Impact: cleanup, move all hrtimer processing into hardirq context This is an attempt at removing some of the hrtimer complexity by reducing the number of callback modes to 1. This means that all hrtimer callback functions will be ran from HARD-irq context. I went through all the 30 odd hrtimer callback functions in the kernel and saw only one that I'm not quite sure of, which is the one in net/can/bcm.c - hence I'm CC-ing the folks responsible for that code. Furthermore, the hrtimer core now calls callbacks directly with IRQs disabled in case you try to enqueue an expired timer. If this timer is a periodic timer (which should use hrtimer_forward() to advance its time) then it might be possible to end up in an inf. recursive loop due to the fact that hrtimer_forward() doesn't round up to the next timer granularity, and therefore keeps on calling the callback - obviously this needs a fix. Aside from that, this seems to compile and actually boot on my dual core test box - although I'm sure there are some bugs in, me not hitting any makes me certain :-) Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- drivers/input/touchscreen/ads7846.c | 4 +- include/linux/hrtimer.h | 34 +---- include/linux/interrupt.h | 3 - kernel/hrtimer.c | 280 ++++-------------------------------- kernel/sched.c | 2 - kernel/time/ntp.c | 4 +- kernel/time/tick-sched.c | 1 - kernel/trace/trace_sysprof.c | 1 - sound/drivers/pcsp/pcsp.c | 1 - 9 files changed, 37 insertions(+), 293 deletions(-) (limited to 'include') diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index b9b7fc6ff1eb..e1ece89fe922 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -697,7 +697,7 @@ static enum hrtimer_restart ads7846_timer(struct hrtimer *handle) struct ads7846 *ts = container_of(handle, struct ads7846, timer); int status = 0; - spin_lock_irq(&ts->lock); + spin_lock(&ts->lock); if (unlikely(!get_pendown_state(ts) || device_suspended(&ts->spi->dev))) { @@ -728,7 +728,7 @@ static enum hrtimer_restart ads7846_timer(struct hrtimer *handle) dev_err(&ts->spi->dev, "spi_async --> %d\n", status); } - spin_unlock_irq(&ts->lock); + spin_unlock(&ts->lock); return HRTIMER_NORESTART; } diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 3eba43878dcb..bd37078c2d7d 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -42,26 +42,6 @@ enum hrtimer_restart { HRTIMER_RESTART, /* Timer must be restarted */ }; -/* - * hrtimer callback modes: - * - * HRTIMER_CB_SOFTIRQ: Callback must run in softirq context - * HRTIMER_CB_IRQSAFE_PERCPU: Callback must run in hardirq context - * Special mode for tick emulation and - * scheduler timer. Such timers are per - * cpu and not allowed to be migrated on - * cpu unplug. - * HRTIMER_CB_IRQSAFE_UNLOCKED: Callback should run in hardirq context - * with timer->base lock unlocked - * used for timers which call wakeup to - * avoid lock order problems with rq->lock - */ -enum hrtimer_cb_mode { - HRTIMER_CB_SOFTIRQ, - HRTIMER_CB_IRQSAFE_PERCPU, - HRTIMER_CB_IRQSAFE_UNLOCKED, -}; - /* * Values to track state of the timer * @@ -70,7 +50,6 @@ enum hrtimer_cb_mode { * 0x00 inactive * 0x01 enqueued into rbtree * 0x02 callback function running - * 0x04 callback pending (high resolution mode) * * Special cases: * 0x03 callback function running and enqueued @@ -92,8 +71,7 @@ enum hrtimer_cb_mode { #define HRTIMER_STATE_INACTIVE 0x00 #define HRTIMER_STATE_ENQUEUED 0x01 #define HRTIMER_STATE_CALLBACK 0x02 -#define HRTIMER_STATE_PENDING 0x04 -#define HRTIMER_STATE_MIGRATE 0x08 +#define HRTIMER_STATE_MIGRATE 0x04 /** * struct hrtimer - the basic hrtimer structure @@ -109,8 +87,6 @@ enum hrtimer_cb_mode { * @function: timer expiry callback function * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) - * @cb_mode: high resolution timer feature to select the callback execution - * mode * @cb_entry: list head to enqueue an expired timer into the callback list * @start_site: timer statistics field to store the site where the timer * was started @@ -129,7 +105,6 @@ struct hrtimer { struct hrtimer_clock_base *base; unsigned long state; struct list_head cb_entry; - enum hrtimer_cb_mode cb_mode; #ifdef CONFIG_TIMER_STATS int start_pid; void *start_site; @@ -188,15 +163,11 @@ struct hrtimer_clock_base { * @check_clocks: Indictator, when set evaluate time source and clock * event devices whether high resolution mode can be * activated. - * @cb_pending: Expired timers are moved from the rbtree to this - * list in the timer interrupt. The list is processed - * in the softirq. * @nr_events: Total number of timer interrupt events */ struct hrtimer_cpu_base { spinlock_t lock; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; - struct list_head cb_pending; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; @@ -404,8 +375,7 @@ static inline int hrtimer_active(const struct hrtimer *timer) */ static inline int hrtimer_is_queued(struct hrtimer *timer) { - return timer->state & - (HRTIMER_STATE_ENQUEUED | HRTIMER_STATE_PENDING); + return timer->state & HRTIMER_STATE_ENQUEUED; } /* diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f58a0cf8929a..d6210a97a8ca 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -251,9 +251,6 @@ enum BLOCK_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, -#ifdef CONFIG_HIGH_RES_TIMERS - HRTIMER_SOFTIRQ, -#endif RCU_SOFTIRQ, /* Preferable RCU should always be the last softirq */ NR_SOFTIRQS diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 47e63349d1b2..efd6f41e1c16 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -442,22 +442,6 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer) { } static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } #endif -/* - * Check, whether the timer is on the callback pending list - */ -static inline int hrtimer_cb_pending(const struct hrtimer *timer) -{ - return timer->state & HRTIMER_STATE_PENDING; -} - -/* - * Remove a timer from the callback pending list - */ -static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) -{ - list_del_init(&timer->cb_entry); -} - /* High resolution timer related functions */ #ifdef CONFIG_HIGH_RES_TIMERS @@ -651,6 +635,8 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } +static void __run_hrtimer(struct hrtimer *timer); + /* * When High resolution timers are active, try to reprogram. Note, that in case * the state has HRTIMER_STATE_CALLBACK set, no reprogramming and no expiry @@ -661,31 +647,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, struct hrtimer_clock_base *base) { if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { - - /* Timer is expired, act upon the callback mode */ - switch(timer->cb_mode) { - case HRTIMER_CB_IRQSAFE_PERCPU: - case HRTIMER_CB_IRQSAFE_UNLOCKED: - /* - * This is solely for the sched tick emulation with - * dynamic tick support to ensure that we do not - * restart the tick right on the edge and end up with - * the tick timer in the softirq ! The calling site - * takes care of this. Also used for hrtimer sleeper ! - */ - debug_hrtimer_deactivate(timer); - return 1; - case HRTIMER_CB_SOFTIRQ: - /* - * Move everything else into the softirq pending list ! - */ - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - timer->state = HRTIMER_STATE_PENDING; - return 1; - default: - BUG(); - } + /* + * XXX: recursion check? + * hrtimer_forward() should round up with timer granularity + * so that we never get into inf recursion here, + * it doesn't do that though + */ + __run_hrtimer(timer); + return 1; } return 0; } @@ -724,11 +693,6 @@ static int hrtimer_switch_to_hres(void) return 1; } -static inline void hrtimer_raise_softirq(void) -{ - raise_softirq(HRTIMER_SOFTIRQ); -} - #else static inline int hrtimer_hres_active(void) { return 0; } @@ -747,7 +711,6 @@ static inline int hrtimer_reprogram(struct hrtimer *timer, { return 0; } -static inline void hrtimer_raise_softirq(void) { } #endif /* CONFIG_HIGH_RES_TIMERS */ @@ -890,10 +853,7 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, unsigned long newstate, int reprogram) { - /* High res. callback list. NOP for !HIGHRES */ - if (hrtimer_cb_pending(timer)) - hrtimer_remove_cb_pending(timer); - else { + if (timer->state & HRTIMER_STATE_ENQUEUED) { /* * Remove the timer from the rbtree and replace the * first entry pointer if necessary. @@ -953,7 +913,7 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n { struct hrtimer_clock_base *base, *new_base; unsigned long flags; - int ret, raise; + int ret; base = lock_hrtimer_base(timer, &flags); @@ -988,26 +948,8 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n enqueue_hrtimer(timer, new_base, new_base->cpu_base == &__get_cpu_var(hrtimer_bases)); - /* - * The timer may be expired and moved to the cb_pending - * list. We can not raise the softirq with base lock held due - * to a possible deadlock with runqueue lock. - */ - raise = timer->state == HRTIMER_STATE_PENDING; - - /* - * We use preempt_disable to prevent this task from migrating after - * setting up the softirq and raising it. Otherwise, if me migrate - * we will raise the softirq on the wrong CPU. - */ - preempt_disable(); - unlock_hrtimer_base(timer, &flags); - if (raise) - hrtimer_raise_softirq(); - preempt_enable(); - return ret; } EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); @@ -1192,75 +1134,6 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) } EXPORT_SYMBOL_GPL(hrtimer_get_res); -static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) -{ - spin_lock_irq(&cpu_base->lock); - - while (!list_empty(&cpu_base->cb_pending)) { - enum hrtimer_restart (*fn)(struct hrtimer *); - struct hrtimer *timer; - int restart; - int emulate_hardirq_ctx = 0; - - timer = list_entry(cpu_base->cb_pending.next, - struct hrtimer, cb_entry); - - debug_hrtimer_deactivate(timer); - timer_stats_account_hrtimer(timer); - - fn = timer->function; - /* - * A timer might have been added to the cb_pending list - * when it was migrated during a cpu-offline operation. - * Emulate hardirq context for such timers. - */ - if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU || - timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) - emulate_hardirq_ctx = 1; - - __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); - spin_unlock_irq(&cpu_base->lock); - - if (unlikely(emulate_hardirq_ctx)) { - local_irq_disable(); - restart = fn(timer); - local_irq_enable(); - } else - restart = fn(timer); - - spin_lock_irq(&cpu_base->lock); - - timer->state &= ~HRTIMER_STATE_CALLBACK; - if (restart == HRTIMER_RESTART) { - BUG_ON(hrtimer_active(timer)); - /* - * Enqueue the timer, allow reprogramming of the event - * device - */ - enqueue_hrtimer(timer, timer->base, 1); - } else if (hrtimer_active(timer)) { - /* - * If the timer was rearmed on another CPU, reprogram - * the event device. - */ - struct hrtimer_clock_base *base = timer->base; - - if (base->first == &timer->node && - hrtimer_reprogram(timer, base)) { - /* - * Timer is expired. Thus move it from tree to - * pending list again. - */ - __remove_hrtimer(timer, base, - HRTIMER_STATE_PENDING, 0); - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - } - } - } - spin_unlock_irq(&cpu_base->lock); -} - static void __run_hrtimer(struct hrtimer *timer) { struct hrtimer_clock_base *base = timer->base; @@ -1268,25 +1141,21 @@ static void __run_hrtimer(struct hrtimer *timer) enum hrtimer_restart (*fn)(struct hrtimer *); int restart; + WARN_ON(!irqs_disabled()); + debug_hrtimer_deactivate(timer); __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); timer_stats_account_hrtimer(timer); - fn = timer->function; - if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU || - timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) { - /* - * Used for scheduler timers, avoid lock inversion with - * rq->lock and tasklist_lock. - * - * These timers are required to deal with enqueue expiry - * themselves and are not allowed to migrate. - */ - spin_unlock(&cpu_base->lock); - restart = fn(timer); - spin_lock(&cpu_base->lock); - } else - restart = fn(timer); + + /* + * Because we run timers from hardirq context, there is no chance + * they get migrated to another cpu, therefore its safe to unlock + * the timer base. + */ + spin_unlock(&cpu_base->lock); + restart = fn(timer); + spin_lock(&cpu_base->lock); /* * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid @@ -1311,7 +1180,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); struct hrtimer_clock_base *base; ktime_t expires_next, now; - int i, raise = 0; + int i; BUG_ON(!cpu_base->hres_active); cpu_base->nr_events++; @@ -1360,16 +1229,6 @@ void hrtimer_interrupt(struct clock_event_device *dev) break; } - /* Move softirq callbacks to the pending list */ - if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { - __remove_hrtimer(timer, base, - HRTIMER_STATE_PENDING, 0); - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - raise = 1; - continue; - } - __run_hrtimer(timer); } spin_unlock(&cpu_base->lock); @@ -1383,10 +1242,6 @@ void hrtimer_interrupt(struct clock_event_device *dev) if (tick_program_event(expires_next, 0)) goto retry; } - - /* Raise softirq ? */ - if (raise) - raise_softirq(HRTIMER_SOFTIRQ); } /** @@ -1413,11 +1268,6 @@ void hrtimer_peek_ahead_timers(void) local_irq_restore(flags); } -static void run_hrtimer_softirq(struct softirq_action *h) -{ - run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); -} - #endif /* CONFIG_HIGH_RES_TIMERS */ /* @@ -1429,8 +1279,6 @@ static void run_hrtimer_softirq(struct softirq_action *h) */ void hrtimer_run_pending(void) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - if (hrtimer_hres_active()) return; @@ -1444,8 +1292,6 @@ void hrtimer_run_pending(void) */ if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) hrtimer_switch_to_hres(); - - run_hrtimer_pending(cpu_base); } /* @@ -1482,14 +1328,6 @@ void hrtimer_run_queues(void) hrtimer_get_expires_tv64(timer)) break; - if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { - __remove_hrtimer(timer, base, - HRTIMER_STATE_PENDING, 0); - list_add_tail(&timer->cb_entry, - &base->cpu_base->cb_pending); - continue; - } - __run_hrtimer(timer); } spin_unlock(&cpu_base->lock); @@ -1516,9 +1354,6 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) { sl->timer.function = hrtimer_wakeup; sl->task = task; -#ifdef CONFIG_HIGH_RES_TIMERS - sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; -#endif } static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) @@ -1655,36 +1490,22 @@ static void __cpuinit init_hrtimers_cpu(int cpu) for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) cpu_base->clock_base[i].cpu_base = cpu_base; - INIT_LIST_HEAD(&cpu_base->cb_pending); hrtimer_init_hres(cpu_base); } #ifdef CONFIG_HOTPLUG_CPU -static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, - struct hrtimer_clock_base *new_base, int dcpu) +static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, + struct hrtimer_clock_base *new_base, int dcpu) { struct hrtimer *timer; struct rb_node *node; - int raise = 0; while ((node = rb_first(&old_base->active))) { timer = rb_entry(node, struct hrtimer, node); BUG_ON(hrtimer_callback_running(timer)); debug_hrtimer_deactivate(timer); - /* - * Should not happen. Per CPU timers should be - * canceled _before_ the migration code is called - */ - if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) { - __remove_hrtimer(timer, old_base, - HRTIMER_STATE_INACTIVE, 0); - WARN(1, "hrtimer (%p %p)active but cpu %d dead\n", - timer, timer->function, dcpu); - continue; - } - /* * Mark it as STATE_MIGRATE not INACTIVE otherwise the * timer could be seen as !active and just vanish away @@ -1708,48 +1529,19 @@ static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base, * otherwise we end up with a stale timer. */ if (timer->state == HRTIMER_STATE_MIGRATE) { - timer->state = HRTIMER_STATE_PENDING; - list_add_tail(&timer->cb_entry, - &new_base->cpu_base->cb_pending); - raise = 1; + /* XXX: running on offline cpu */ + __run_hrtimer(timer); } #endif /* Clear the migration state bit */ timer->state &= ~HRTIMER_STATE_MIGRATE; } - return raise; } -#ifdef CONFIG_HIGH_RES_TIMERS -static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base, - struct hrtimer_cpu_base *new_base) -{ - struct hrtimer *timer; - int raise = 0; - - while (!list_empty(&old_base->cb_pending)) { - timer = list_entry(old_base->cb_pending.next, - struct hrtimer, cb_entry); - - __remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0); - timer->base = &new_base->clock_base[timer->base->index]; - list_add_tail(&timer->cb_entry, &new_base->cb_pending); - raise = 1; - } - return raise; -} -#else -static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base, - struct hrtimer_cpu_base *new_base) -{ - return 0; -} -#endif - static void migrate_hrtimers(int cpu) { struct hrtimer_cpu_base *old_base, *new_base; - int i, raise = 0; + int i; BUG_ON(cpu_online(cpu)); old_base = &per_cpu(hrtimer_bases, cpu); @@ -1764,20 +1556,13 @@ static void migrate_hrtimers(int cpu) spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { - if (migrate_hrtimer_list(&old_base->clock_base[i], - &new_base->clock_base[i], cpu)) - raise = 1; + migrate_hrtimer_list(&old_base->clock_base[i], + &new_base->clock_base[i], cpu); } - if (migrate_hrtimer_pending(old_base, new_base)) - raise = 1; - spin_unlock(&old_base->lock); spin_unlock_irq(&new_base->lock); put_cpu_var(hrtimer_bases); - - if (raise) - hrtimer_raise_softirq(); } #endif /* CONFIG_HOTPLUG_CPU */ @@ -1817,9 +1602,6 @@ void __init hrtimers_init(void) hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); register_cpu_notifier(&hrtimers_nb); -#ifdef CONFIG_HIGH_RES_TIMERS - open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq); -#endif } /** diff --git a/kernel/sched.c b/kernel/sched.c index 9b1e79371c20..5ac5e9536168 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -203,7 +203,6 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rt_b->rt_period_timer.function = sched_rt_period_timer; - rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED; } static inline int rt_bandwidth_enabled(void) @@ -1139,7 +1138,6 @@ static void init_rq_hrtick(struct rq *rq) hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); rq->hrtick_timer.function = hrtick; - rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; } #else /* CONFIG_SCHED_HRTICK */ static inline void hrtick_clear(struct rq *rq) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 8ff15e5d486b..f5f793d92415 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -131,7 +131,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) { enum hrtimer_restart res = HRTIMER_NORESTART; - write_seqlock_irq(&xtime_lock); + write_seqlock(&xtime_lock); switch (time_state) { case TIME_OK: @@ -164,7 +164,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer) } update_vsyscall(&xtime, clock); - write_sequnlock_irq(&xtime_lock); + write_sequnlock(&xtime_lock); return res; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 342fc9ccab46..502a81e2639b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -681,7 +681,6 @@ void tick_setup_sched_timer(void) */ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); ts->sched_timer.function = tick_sched_timer; - ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; /* Get the next period (per cpu) */ hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 9587d3bcba55..ae542e2e38d5 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -202,7 +202,6 @@ static void start_stack_timer(int cpu) hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = stack_trace_timer_fn; - hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU; hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); } diff --git a/sound/drivers/pcsp/pcsp.c b/sound/drivers/pcsp/pcsp.c index 1899cf0685bc..8e52b2a8a13a 100644 --- a/sound/drivers/pcsp/pcsp.c +++ b/sound/drivers/pcsp/pcsp.c @@ -96,7 +96,6 @@ static int __devinit snd_card_pcsp_probe(int devnum, struct device *dev) return -EINVAL; hrtimer_init(&pcsp_chip.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - pcsp_chip.timer.cb_mode = HRTIMER_CB_SOFTIRQ; pcsp_chip.timer.function = pcsp_do_timer; card = snd_card_new(index, id, THIS_MODULE, 0); -- cgit v1.2.3 From ca0002a179bfa532d009a9272d619732872c49bd Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 09:01:25 +0100 Subject: x86, bts: base in-kernel ds interface on handles Impact: generalize the DS code to shared buffers Change the in-kernel ds.h interface to identify the tracer via a handle returned on ds_request_~(). Tracers used to be identified via their task_struct. The changes are required to allow DS to be shared between different tasks, which is needed for perfmon2 and for ftrace. For ptrace, the handle is stored in the traced task's task_struct. This should probably go into a (arch-specific) ptrace context some time. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ds.h | 124 ++++----- arch/x86/kernel/ds.c | 679 +++++++++++++++++++++++----------------------- arch/x86/kernel/ptrace.c | 73 ++--- include/linux/sched.h | 9 + 4 files changed, 446 insertions(+), 439 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index a95008457ea4..0af997de5f01 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -26,11 +26,18 @@ #include #include +#include #ifdef CONFIG_X86_DS struct task_struct; +struct ds_tracer; +struct bts_tracer; +struct pebs_tracer; + +typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); +typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); /* * Request BTS or PEBS @@ -38,21 +45,29 @@ struct task_struct; * Due to alignement constraints, the actual buffer may be slightly * smaller than the requested or provided buffer. * - * Returns 0 on success; -Eerrno otherwise + * Returns a pointer to a tracer structure on success, or + * ERR_PTR(errcode) on failure. + * + * The interrupt threshold is independent from the overflow callback + * to allow users to use their own overflow interrupt handling mechanism. * * task: the task to request recording for; * NULL for per-cpu recording on the current cpu * base: the base pointer for the (non-pageable) buffer; * NULL if buffer allocation requested - * size: the size of the requested or provided buffer + * size: the size of the requested or provided buffer in bytes * ovfl: pointer to a function to be called on buffer overflow; * NULL if cyclic buffer requested + * th: the interrupt threshold in records from the end of the buffer; + * -1 if no interrupt threshold is requested. */ -typedef void (*ds_ovfl_callback_t)(struct task_struct *); -extern int ds_request_bts(struct task_struct *task, void *base, size_t size, - ds_ovfl_callback_t ovfl); -extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, - ds_ovfl_callback_t ovfl); +extern struct bts_tracer *ds_request_bts(struct task_struct *task, + void *base, size_t size, + bts_ovfl_callback_t ovfl, size_t th); +extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th); /* * Release BTS or PEBS resources @@ -61,37 +76,34 @@ extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, * * Returns 0 on success; -Eerrno otherwise * - * task: the task to release resources for; - * NULL to release resources for the current cpu + * tracer: the tracer handle returned from ds_request_~() */ -extern int ds_release_bts(struct task_struct *task); -extern int ds_release_pebs(struct task_struct *task); +extern int ds_release_bts(struct bts_tracer *tracer); +extern int ds_release_pebs(struct pebs_tracer *tracer); /* - * Return the (array) index of the write pointer. + * Get the (array) index of the write pointer. * (assuming an array of BTS/PEBS records) * - * Returns -Eerrno on error + * Returns 0 on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu - * pos (out): if not NULL, will hold the result + * tracer: the tracer handle returned from ds_request_~() + * pos (out): will hold the result */ -extern int ds_get_bts_index(struct task_struct *task, size_t *pos); -extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); +extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos); +extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos); /* - * Return the (array) index one record beyond the end of the array. + * Get the (array) index one record beyond the end of the array. * (assuming an array of BTS/PEBS records) * - * Returns -Eerrno on error + * Returns 0 on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu - * pos (out): if not NULL, will hold the result + * tracer: the tracer handle returned from ds_request_~() + * pos (out): will hold the result */ -extern int ds_get_bts_end(struct task_struct *task, size_t *pos); -extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); +extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos); +extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos); /* * Provide a pointer to the BTS/PEBS record at parameter index. @@ -102,14 +114,13 @@ extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); * * Returns the size of a single record on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu + * tracer: the tracer handle returned from ds_request_~() * index: the index of the requested record * record (out): pointer to the requested record */ -extern int ds_access_bts(struct task_struct *task, +extern int ds_access_bts(struct bts_tracer *tracer, size_t index, const void **record); -extern int ds_access_pebs(struct task_struct *task, +extern int ds_access_pebs(struct pebs_tracer *tracer, size_t index, const void **record); /* @@ -129,38 +140,24 @@ extern int ds_access_pebs(struct task_struct *task, * * Returns the number of bytes written or -Eerrno. * - * task: the task to access; - * NULL to access the current cpu + * tracer: the tracer handle returned from ds_request_~() * buffer: the buffer to write * size: the size of the buffer */ -extern int ds_write_bts(struct task_struct *task, +extern int ds_write_bts(struct bts_tracer *tracer, const void *buffer, size_t size); -extern int ds_write_pebs(struct task_struct *task, +extern int ds_write_pebs(struct pebs_tracer *tracer, const void *buffer, size_t size); -/* - * Same as ds_write_bts/pebs, but omit ownership checks. - * - * This is needed to have some other task than the owner of the - * BTS/PEBS buffer or the parameter task itself write into the - * respective buffer. - */ -extern int ds_unchecked_write_bts(struct task_struct *task, - const void *buffer, size_t size); -extern int ds_unchecked_write_pebs(struct task_struct *task, - const void *buffer, size_t size); - /* * Reset the write pointer of the BTS/PEBS buffer. * * Returns 0 on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu + * tracer: the tracer handle returned from ds_request_~() */ -extern int ds_reset_bts(struct task_struct *task); -extern int ds_reset_pebs(struct task_struct *task); +extern int ds_reset_bts(struct bts_tracer *tracer); +extern int ds_reset_pebs(struct pebs_tracer *tracer); /* * Clear the BTS/PEBS buffer and reset the write pointer. @@ -168,33 +165,30 @@ extern int ds_reset_pebs(struct task_struct *task); * * Returns 0 on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu + * tracer: the tracer handle returned from ds_request_~() */ -extern int ds_clear_bts(struct task_struct *task); -extern int ds_clear_pebs(struct task_struct *task); +extern int ds_clear_bts(struct bts_tracer *tracer); +extern int ds_clear_pebs(struct pebs_tracer *tracer); /* * Provide the PEBS counter reset value. * * Returns 0 on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu + * tracer: the tracer handle returned from ds_request_pebs() * value (out): the counter reset value */ -extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); +extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value); /* * Set the PEBS counter reset value. * * Returns 0 on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu + * tracer: the tracer handle returned from ds_request_pebs() * value: the new counter reset value */ -extern int ds_set_pebs_reset(struct task_struct *task, u64 value); +extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); /* * Initialization @@ -207,17 +201,13 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); /* * The DS context - part of struct thread_struct. */ +#define MAX_SIZEOF_DS (12 * 8) + struct ds_context { /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ - unsigned char *ds; + unsigned char ds[MAX_SIZEOF_DS]; /* the owner of the BTS and PEBS configuration, respectively */ - struct task_struct *owner[2]; - /* buffer overflow notification function for BTS and PEBS */ - ds_ovfl_callback_t callback[2]; - /* the original buffer address */ - void *buffer[2]; - /* the number of allocated pages for on-request allocated buffers */ - unsigned int pages[2]; + struct ds_tracer *owner[2]; /* use count */ unsigned long count; /* a pointer to the context location inside the thread_struct diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index d6938d9351cf..96768e9cce99 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -28,6 +28,7 @@ #include #include #include +#include /* @@ -44,6 +45,35 @@ struct ds_configuration { }; static struct ds_configuration ds_cfg; +/* + * A BTS or PEBS tracer. + * + * This holds the configuration of the tracer and serves as a handle + * to identify tracers. + */ +struct ds_tracer { + /* the DS context (partially) owned by this tracer */ + struct ds_context *context; + /* the buffer provided on ds_request() and its size in bytes */ + void *buffer; + size_t size; + /* the number of allocated pages for on-request allocated buffers */ + unsigned int pages; +}; + +struct bts_tracer { + /* the common DS part */ + struct ds_tracer ds; + /* buffer overflow notification function */ + bts_ovfl_callback_t ovfl; +}; + +struct pebs_tracer { + /* the common DS part */ + struct ds_tracer ds; + /* buffer overflow notification function */ + pebs_ovfl_callback_t ovfl; +}; /* * Debug Store (DS) save area configuration (see Intel64 and IA32 @@ -107,35 +137,15 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, (*(unsigned long *)base) = value; } +#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ + /* * Locking is done only for allocating BTS or PEBS resources and for * guarding context and buffer memory allocation. - * - * Most functions require the current task to own the ds context part - * they are going to access. All the locking is done when validating - * access to the context. */ static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); -/* - * Validate that the current task is allowed to access the BTS/PEBS - * buffer of the parameter task. - * - * Returns 0, if access is granted; -Eerrno, otherwise. - */ -static inline int ds_validate_access(struct ds_context *context, - enum ds_qualifier qual) -{ - if (!context) - return -EPERM; - - if (context->owner[qual] == current) - return 0; - - return -EPERM; -} - /* * We either support (system-wide) per-cpu or per-thread allocation. @@ -183,50 +193,12 @@ static inline int check_tracer(struct task_struct *task) * * Contexts are use-counted. They are allocated on first access and * deallocated when the last user puts the context. - * - * We distinguish between an allocating and a non-allocating get of a - * context: - * - the allocating get is used for requesting BTS/PEBS resources. It - * requires the caller to hold the global ds_lock. - * - the non-allocating get is used for all other cases. A - * non-existing context indicates an error. It acquires and releases - * the ds_lock itself for obtaining the context. - * - * A context and its DS configuration are allocated and deallocated - * together. A context always has a DS configuration of the - * appropriate size. */ static DEFINE_PER_CPU(struct ds_context *, system_context); #define this_system_context per_cpu(system_context, smp_processor_id()) -/* - * Returns the pointer to the parameter task's context or to the - * system-wide context, if task is NULL. - * - * Increases the use count of the returned context, if not NULL. - */ static inline struct ds_context *ds_get_context(struct task_struct *task) -{ - struct ds_context *context; - unsigned long irq; - - spin_lock_irqsave(&ds_lock, irq); - - context = (task ? task->thread.ds_ctx : this_system_context); - if (context) - context->count++; - - spin_unlock_irqrestore(&ds_lock, irq); - - return context; -} - -/* - * Same as ds_get_context, but allocates the context and it's DS - * structure, if necessary; returns NULL; if out of memory. - */ -static inline struct ds_context *ds_alloc_context(struct task_struct *task) { struct ds_context **p_context = (task ? &task->thread.ds_ctx : &this_system_context); @@ -238,16 +210,9 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) if (!context) return NULL; - context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); - if (!context->ds) { - kfree(context); - return NULL; - } - spin_lock_irqsave(&ds_lock, irq); if (*p_context) { - kfree(context->ds); kfree(context); context = *p_context; @@ -272,10 +237,6 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) return context; } -/* - * Decreases the use count of the parameter context, if not NULL. - * Deallocates the context, if the use count reaches zero. - */ static inline void ds_put_context(struct ds_context *context) { unsigned long irq; @@ -296,13 +257,6 @@ static inline void ds_put_context(struct ds_context *context) if (!context->task || (context->task == current)) wrmsrl(MSR_IA32_DS_AREA, 0); - put_tracer(context->task); - - /* free any leftover buffers from tracers that did not - * deallocate them properly. */ - kfree(context->buffer[ds_bts]); - kfree(context->buffer[ds_pebs]); - kfree(context->ds); kfree(context); out: spin_unlock_irqrestore(&ds_lock, irq); @@ -312,21 +266,29 @@ static inline void ds_put_context(struct ds_context *context) /* * Handle a buffer overflow * - * task: the task whose buffers are overflowing; - * NULL for a buffer overflow on the current cpu * context: the ds context * qual: the buffer type */ -static void ds_overflow(struct task_struct *task, struct ds_context *context, - enum ds_qualifier qual) -{ - if (!context) - return; - - if (context->callback[qual]) - (*context->callback[qual])(task); - - /* todo: do some more overflow handling */ +static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) +{ + switch (qual) { + case ds_bts: { + struct bts_tracer *tracer = + container_of(context->owner[qual], + struct bts_tracer, ds); + if (tracer->ovfl) + tracer->ovfl(tracer); + } + break; + case ds_pebs: { + struct pebs_tracer *tracer = + container_of(context->owner[qual], + struct pebs_tracer, ds); + if (tracer->ovfl) + tracer->ovfl(tracer); + } + break; + } } @@ -343,23 +305,25 @@ static void ds_overflow(struct task_struct *task, struct ds_context *context, static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) { unsigned long rlim, vm, pgsz; - void *buffer; + void *buffer = NULL; pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; + down_write(¤t->mm->mmap_sem); + rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; vm = current->mm->total_vm + pgsz; if (rlim < vm) - return NULL; + goto out; rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; vm = current->mm->locked_vm + pgsz; if (rlim < vm) - return NULL; + goto out; buffer = kzalloc(size, GFP_KERNEL); if (!buffer) - return NULL; + goto out; current->mm->total_vm += pgsz; current->mm->locked_vm += pgsz; @@ -367,290 +331,337 @@ static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) if (pages) *pages = pgsz; + out: + up_write(¤t->mm->mmap_sem); return buffer; } -static int ds_request(struct task_struct *task, void *base, size_t size, - ds_ovfl_callback_t ovfl, enum ds_qualifier qual) +static void ds_install_ds_config(struct ds_context *context, + enum ds_qualifier qual, + void *base, size_t size, size_t ith) { - struct ds_context *context; unsigned long buffer, adj; - const unsigned long alignment = (1 << 3); + + /* adjust the buffer address and size to meet alignment + * constraints: + * - buffer is double-word aligned + * - size is multiple of record size + * + * We checked the size at the very beginning; we have enough + * space to do the adjustment. + */ + buffer = (unsigned long)base; + + adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; + buffer += adj; + size -= adj; + + size /= ds_cfg.sizeof_rec[qual]; + size *= ds_cfg.sizeof_rec[qual]; + + ds_set(context->ds, qual, ds_buffer_base, buffer); + ds_set(context->ds, qual, ds_index, buffer); + ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); + + /* The value for 'no threshold' is -1, which will set the + * threshold outside of the buffer, just like we want it. + */ + ds_set(context->ds, qual, + ds_interrupt_threshold, buffer + size - ith); +} + +static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual, + struct task_struct *task, + void *base, size_t size, size_t th) +{ + struct ds_context *context; unsigned long irq; - int error = 0; + int error; + error = -EOPNOTSUPP; if (!ds_cfg.sizeof_ds) - return -EOPNOTSUPP; + goto out; /* we require some space to do alignment adjustments below */ - if (size < (alignment + ds_cfg.sizeof_rec[qual])) - return -EINVAL; + error = -EINVAL; + if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) + goto out; - /* buffer overflow notification is not yet implemented */ - if (ovfl) - return -EOPNOTSUPP; + if (th != (size_t)-1) { + th *= ds_cfg.sizeof_rec[qual]; + + error = -EINVAL; + if (size <= th) + goto out; + } + + error = -ENOMEM; + if (!base) { + base = ds_allocate_buffer(size, &tracer->pages); + if (!base) + goto out; + } + tracer->buffer = base; + tracer->size = size; - context = ds_alloc_context(task); + error = -ENOMEM; + context = ds_get_context(task); if (!context) - return -ENOMEM; + goto out; + tracer->context = context; + spin_lock_irqsave(&ds_lock, irq); error = -EPERM; if (!check_tracer(task)) goto out_unlock; - get_tracer(task); - error = -EALREADY; - if (context->owner[qual] == current) - goto out_put_tracer; error = -EPERM; - if (context->owner[qual] != NULL) + if (context->owner[qual]) goto out_put_tracer; - context->owner[qual] = current; + context->owner[qual] = tracer; spin_unlock_irqrestore(&ds_lock, irq); - error = -ENOMEM; - if (!base) { - base = ds_allocate_buffer(size, &context->pages[qual]); - if (!base) - goto out_release; - - context->buffer[qual] = base; - } - error = 0; + ds_install_ds_config(context, qual, base, size, th); - context->callback[qual] = ovfl; - - /* adjust the buffer address and size to meet alignment - * constraints: - * - buffer is double-word aligned - * - size is multiple of record size - * - * We checked the size at the very beginning; we have enough - * space to do the adjustment. - */ - buffer = (unsigned long)base; - - adj = ALIGN(buffer, alignment) - buffer; - buffer += adj; - size -= adj; - - size /= ds_cfg.sizeof_rec[qual]; - size *= ds_cfg.sizeof_rec[qual]; - - ds_set(context->ds, qual, ds_buffer_base, buffer); - ds_set(context->ds, qual, ds_index, buffer); - ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); - - if (ovfl) { - /* todo: select a suitable interrupt threshold */ - } else - ds_set(context->ds, qual, - ds_interrupt_threshold, buffer + size + 1); - - /* we keep the context until ds_release */ - return error; - - out_release: - context->owner[qual] = NULL; - ds_put_context(context); - put_tracer(task); - return error; + return 0; out_put_tracer: - spin_unlock_irqrestore(&ds_lock, irq); - ds_put_context(context); put_tracer(task); - return error; - out_unlock: spin_unlock_irqrestore(&ds_lock, irq); ds_put_context(context); + tracer->context = NULL; + out: return error; } -int ds_request_bts(struct task_struct *task, void *base, size_t size, - ds_ovfl_callback_t ovfl) +struct bts_tracer *ds_request_bts(struct task_struct *task, + void *base, size_t size, + bts_ovfl_callback_t ovfl, size_t th) { - return ds_request(task, base, size, ovfl, ds_bts); -} + struct bts_tracer *tracer; + int error; -int ds_request_pebs(struct task_struct *task, void *base, size_t size, - ds_ovfl_callback_t ovfl) -{ - return ds_request(task, base, size, ovfl, ds_pebs); + /* buffer overflow notification is not yet implemented */ + error = -EOPNOTSUPP; + if (ovfl) + goto out; + + error = -ENOMEM; + tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); + if (!tracer) + goto out; + tracer->ovfl = ovfl; + + error = ds_request(&tracer->ds, ds_bts, task, base, size, th); + if (error < 0) + goto out_tracer; + + return tracer; + + out_tracer: + (void)ds_release_bts(tracer); + out: + return ERR_PTR(error); } -static int ds_release(struct task_struct *task, enum ds_qualifier qual) +struct pebs_tracer *ds_request_pebs(struct task_struct *task, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, size_t th) { - struct ds_context *context; + struct pebs_tracer *tracer; int error; - context = ds_get_context(task); - error = ds_validate_access(context, qual); - if (error < 0) + /* buffer overflow notification is not yet implemented */ + error = -EOPNOTSUPP; + if (ovfl) goto out; - kfree(context->buffer[qual]); - context->buffer[qual] = NULL; + error = -ENOMEM; + tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); + if (!tracer) + goto out; + tracer->ovfl = ovfl; - current->mm->total_vm -= context->pages[qual]; - current->mm->locked_vm -= context->pages[qual]; - context->pages[qual] = 0; - context->owner[qual] = NULL; + error = ds_request(&tracer->ds, ds_pebs, task, base, size, th); + if (error < 0) + goto out_tracer; - /* - * we put the context twice: - * once for the ds_get_context - * once for the corresponding ds_request - */ - ds_put_context(context); + return tracer; + + out_tracer: + (void)ds_release_pebs(tracer); out: - ds_put_context(context); - return error; + return ERR_PTR(error); +} + +static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual) +{ + if (tracer->context) { + BUG_ON(tracer->context->owner[qual] != tracer); + tracer->context->owner[qual] = NULL; + + put_tracer(tracer->context->task); + ds_put_context(tracer->context); + } + + if (tracer->pages) { + kfree(tracer->buffer); + + down_write(¤t->mm->mmap_sem); + + current->mm->total_vm -= tracer->pages; + current->mm->locked_vm -= tracer->pages; + + up_write(¤t->mm->mmap_sem); + } } -int ds_release_bts(struct task_struct *task) +int ds_release_bts(struct bts_tracer *tracer) { - return ds_release(task, ds_bts); + if (!tracer) + return -EINVAL; + + ds_release(&tracer->ds, ds_bts); + kfree(tracer); + + return 0; } -int ds_release_pebs(struct task_struct *task) +int ds_release_pebs(struct pebs_tracer *tracer) { - return ds_release(task, ds_pebs); + if (!tracer) + return -EINVAL; + + ds_release(&tracer->ds, ds_pebs); + kfree(tracer); + + return 0; } -static int ds_get_index(struct task_struct *task, size_t *pos, - enum ds_qualifier qual) +static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual) { - struct ds_context *context; unsigned long base, index; - int error; - - context = ds_get_context(task); - error = ds_validate_access(context, qual); - if (error < 0) - goto out; base = ds_get(context->ds, qual, ds_buffer_base); index = ds_get(context->ds, qual, ds_index); - error = ((index - base) / ds_cfg.sizeof_rec[qual]); - if (pos) - *pos = error; - out: - ds_put_context(context); - return error; + return (index - base) / ds_cfg.sizeof_rec[qual]; } -int ds_get_bts_index(struct task_struct *task, size_t *pos) +int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos) { - return ds_get_index(task, pos, ds_bts); + if (!tracer) + return -EINVAL; + + if (!pos) + return -EINVAL; + + *pos = ds_get_index(tracer->ds.context, ds_bts); + + return 0; } -int ds_get_pebs_index(struct task_struct *task, size_t *pos) +int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos) { - return ds_get_index(task, pos, ds_pebs); + if (!tracer) + return -EINVAL; + + if (!pos) + return -EINVAL; + + *pos = ds_get_index(tracer->ds.context, ds_pebs); + + return 0; } -static int ds_get_end(struct task_struct *task, size_t *pos, - enum ds_qualifier qual) +static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual) { - struct ds_context *context; - unsigned long base, end; - int error; - - context = ds_get_context(task); - error = ds_validate_access(context, qual); - if (error < 0) - goto out; + unsigned long base, max; base = ds_get(context->ds, qual, ds_buffer_base); - end = ds_get(context->ds, qual, ds_absolute_maximum); + max = ds_get(context->ds, qual, ds_absolute_maximum); - error = ((end - base) / ds_cfg.sizeof_rec[qual]); - if (pos) - *pos = error; - out: - ds_put_context(context); - return error; + return (max - base) / ds_cfg.sizeof_rec[qual]; } -int ds_get_bts_end(struct task_struct *task, size_t *pos) +int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos) { - return ds_get_end(task, pos, ds_bts); + if (!tracer) + return -EINVAL; + + if (!pos) + return -EINVAL; + + *pos = ds_get_end(tracer->ds.context, ds_bts); + + return 0; } -int ds_get_pebs_end(struct task_struct *task, size_t *pos) +int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos) { - return ds_get_end(task, pos, ds_pebs); + if (!tracer) + return -EINVAL; + + if (!pos) + return -EINVAL; + + *pos = ds_get_end(tracer->ds.context, ds_pebs); + + return 0; } -static int ds_access(struct task_struct *task, size_t index, - const void **record, enum ds_qualifier qual) +static int ds_access(struct ds_context *context, enum ds_qualifier qual, + size_t index, const void **record) { - struct ds_context *context; unsigned long base, idx; - int error; if (!record) return -EINVAL; - context = ds_get_context(task); - error = ds_validate_access(context, qual); - if (error < 0) - goto out; - base = ds_get(context->ds, qual, ds_buffer_base); idx = base + (index * ds_cfg.sizeof_rec[qual]); - error = -EINVAL; if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) - goto out; + return -EINVAL; *record = (const void *)idx; - error = ds_cfg.sizeof_rec[qual]; - out: - ds_put_context(context); - return error; + + return ds_cfg.sizeof_rec[qual]; } -int ds_access_bts(struct task_struct *task, size_t index, const void **record) +int ds_access_bts(struct bts_tracer *tracer, size_t index, + const void **record) { - return ds_access(task, index, record, ds_bts); + if (!tracer) + return -EINVAL; + + return ds_access(tracer->ds.context, ds_bts, index, record); } -int ds_access_pebs(struct task_struct *task, size_t index, const void **record) +int ds_access_pebs(struct pebs_tracer *tracer, size_t index, + const void **record) { - return ds_access(task, index, record, ds_pebs); + if (!tracer) + return -EINVAL; + + return ds_access(tracer->ds.context, ds_pebs, index, record); } -static int ds_write(struct task_struct *task, const void *record, size_t size, - enum ds_qualifier qual, int force) +static int ds_write(struct ds_context *context, enum ds_qualifier qual, + const void *record, size_t size) { - struct ds_context *context; - int error; + int bytes_written = 0; if (!record) return -EINVAL; - error = -EPERM; - context = ds_get_context(task); - if (!context) - goto out; - - if (!force) { - error = ds_validate_access(context, qual); - if (error < 0) - goto out; - } - - error = 0; while (size) { unsigned long base, index, end, write_end, int_th; unsigned long write_size, adj_write_size; @@ -678,14 +689,14 @@ static int ds_write(struct task_struct *task, const void *record, size_t size, write_end = end; if (write_end <= index) - goto out; + break; write_size = min((unsigned long) size, write_end - index); memcpy((void *)index, record, write_size); record = (const char *)record + write_size; - size -= write_size; - error += write_size; + size -= write_size; + bytes_written += write_size; adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; adj_write_size *= ds_cfg.sizeof_rec[qual]; @@ -700,47 +711,32 @@ static int ds_write(struct task_struct *task, const void *record, size_t size, ds_set(context->ds, qual, ds_index, index); if (index >= int_th) - ds_overflow(task, context, qual); + ds_overflow(context, qual); } - out: - ds_put_context(context); - return error; + return bytes_written; } -int ds_write_bts(struct task_struct *task, const void *record, size_t size) +int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size) { - return ds_write(task, record, size, ds_bts, /* force = */ 0); -} + if (!tracer) + return -EINVAL; -int ds_write_pebs(struct task_struct *task, const void *record, size_t size) -{ - return ds_write(task, record, size, ds_pebs, /* force = */ 0); + return ds_write(tracer->ds.context, ds_bts, record, size); } -int ds_unchecked_write_bts(struct task_struct *task, - const void *record, size_t size) +int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size) { - return ds_write(task, record, size, ds_bts, /* force = */ 1); -} + if (!tracer) + return -EINVAL; -int ds_unchecked_write_pebs(struct task_struct *task, - const void *record, size_t size) -{ - return ds_write(task, record, size, ds_pebs, /* force = */ 1); + return ds_write(tracer->ds.context, ds_pebs, record, size); } -static int ds_reset_or_clear(struct task_struct *task, - enum ds_qualifier qual, int clear) +static void ds_reset_or_clear(struct ds_context *context, + enum ds_qualifier qual, int clear) { - struct ds_context *context; unsigned long base, end; - int error; - - context = ds_get_context(task); - error = ds_validate_access(context, qual); - if (error < 0) - goto out; base = ds_get(context->ds, qual, ds_buffer_base); end = ds_get(context->ds, qual, ds_absolute_maximum); @@ -749,70 +745,69 @@ static int ds_reset_or_clear(struct task_struct *task, memset((void *)base, 0, end - base); ds_set(context->ds, qual, ds_index, base); - - error = 0; - out: - ds_put_context(context); - return error; } -int ds_reset_bts(struct task_struct *task) +int ds_reset_bts(struct bts_tracer *tracer) { - return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); + if (!tracer) + return -EINVAL; + + ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0); + + return 0; } -int ds_reset_pebs(struct task_struct *task) +int ds_reset_pebs(struct pebs_tracer *tracer) { - return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); + if (!tracer) + return -EINVAL; + + ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0); + + return 0; } -int ds_clear_bts(struct task_struct *task) +int ds_clear_bts(struct bts_tracer *tracer) { - return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); + if (!tracer) + return -EINVAL; + + ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1); + + return 0; } -int ds_clear_pebs(struct task_struct *task) +int ds_clear_pebs(struct pebs_tracer *tracer) { - return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); + if (!tracer) + return -EINVAL; + + ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1); + + return 0; } -int ds_get_pebs_reset(struct task_struct *task, u64 *value) +int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value) { - struct ds_context *context; - int error; + if (!tracer) + return -EINVAL; if (!value) return -EINVAL; - context = ds_get_context(task); - error = ds_validate_access(context, ds_pebs); - if (error < 0) - goto out; - - *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); + *value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); - error = 0; - out: - ds_put_context(context); - return error; + return 0; } -int ds_set_pebs_reset(struct task_struct *task, u64 value) +int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) { - struct ds_context *context; - int error; - - context = ds_get_context(task); - error = ds_validate_access(context, ds_pebs); - if (error < 0) - goto out; + if (!tracer) + return -EINVAL; - *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; + *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; - error = 0; - out: - ds_put_context(context); - return error; + return 0; } static const struct ds_configuration ds_cfg_var = { @@ -840,6 +835,10 @@ static inline void ds_configure(const struct ds_configuration *cfg) { ds_cfg = *cfg; + + printk(KERN_INFO "DS available\n"); + + BUG_ON(MAX_SIZEOF_DS < ds_cfg.sizeof_ds); } void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) @@ -883,6 +882,8 @@ void ds_free(struct ds_context *context) * is dying. There should not be any user of that context left * to disturb us, anymore. */ unsigned long leftovers = context->count; - while (leftovers--) + while (leftovers--) { + put_tracer(context->task); ds_put_context(context); + } } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 06180dff5b2e..76adf5b640ff 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -668,14 +668,14 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index, size_t bts_index, bts_end; int error; - error = ds_get_bts_end(child, &bts_end); + error = ds_get_bts_end(child->bts, &bts_end); if (error < 0) return error; if (bts_end <= index) return -EINVAL; - error = ds_get_bts_index(child, &bts_index); + error = ds_get_bts_index(child->bts, &bts_index); if (error < 0) return error; @@ -684,7 +684,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index, if (bts_end <= bts_index) bts_index -= bts_end; - error = ds_access_bts(child, bts_index, &bts_record); + error = ds_access_bts(child->bts, bts_index, &bts_record); if (error < 0) return error; @@ -705,14 +705,14 @@ static int ptrace_bts_drain(struct task_struct *child, size_t end, i; int error; - error = ds_get_bts_index(child, &end); + error = ds_get_bts_index(child->bts, &end); if (error < 0) return error; if (size < (end * sizeof(struct bts_struct))) return -EIO; - error = ds_access_bts(child, 0, (const void **)&raw); + error = ds_access_bts(child->bts, 0, (const void **)&raw); if (error < 0) return error; @@ -723,18 +723,13 @@ static int ptrace_bts_drain(struct task_struct *child, return -EFAULT; } - error = ds_clear_bts(child); + error = ds_clear_bts(child->bts); if (error < 0) return error; return end; } -static void ptrace_bts_ovfl(struct task_struct *child) -{ - send_sig(child->thread.bts_ovfl_signal, child, 0); -} - static int ptrace_bts_config(struct task_struct *child, long cfg_size, const struct ptrace_bts_config __user *ucfg) @@ -760,23 +755,29 @@ static int ptrace_bts_config(struct task_struct *child, goto errout; if (cfg.flags & PTRACE_BTS_O_ALLOC) { - ds_ovfl_callback_t ovfl = NULL; + bts_ovfl_callback_t ovfl = NULL; unsigned int sig = 0; - /* we ignore the error in case we were not tracing child */ - (void)ds_release_bts(child); - if (cfg.flags & PTRACE_BTS_O_SIGNAL) { if (!cfg.signal) goto errout; + error = -EOPNOTSUPP; + goto errout; + sig = cfg.signal; - ovfl = ptrace_bts_ovfl; } - error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); - if (error < 0) + if (child->bts) + (void)ds_release_bts(child->bts); + + child->bts = ds_request_bts(child, /* base = */ NULL, cfg.size, + ovfl, /* th = */ (size_t)-1); + if (IS_ERR(child->bts)) { + error = PTR_ERR(child->bts); + child->bts = NULL; goto errout; + } child->thread.bts_ovfl_signal = sig; } @@ -823,15 +824,15 @@ static int ptrace_bts_status(struct task_struct *child, if (cfg_size < sizeof(cfg)) return -EIO; - error = ds_get_bts_end(child, &end); + error = ds_get_bts_end(child->bts, &end); if (error < 0) return error; - error = ds_access_bts(child, /* index = */ 0, &base); + error = ds_access_bts(child->bts, /* index = */ 0, &base); if (error < 0) return error; - error = ds_access_bts(child, /* index = */ end, &max); + error = ds_access_bts(child->bts, /* index = */ end, &max); if (error < 0) return error; @@ -884,10 +885,7 @@ static int ptrace_bts_write_record(struct task_struct *child, return -EINVAL; } - /* The writing task will be the switched-to task on a context - * switch. It needs to write into the switched-from task's BTS - * buffer. */ - return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); + return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts); } void ptrace_bts_take_timestamp(struct task_struct *tsk, @@ -972,13 +970,15 @@ void ptrace_disable(struct task_struct *child) clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif #ifdef CONFIG_X86_PTRACE_BTS - (void)ds_release_bts(child); + if (child->bts) { + (void)ds_release_bts(child->bts); - child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; - if (!child->thread.debugctlmsr) - clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; + if (!child->thread.debugctlmsr) + clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); + clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); + } #endif /* CONFIG_X86_PTRACE_BTS */ } @@ -1110,9 +1110,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) (child, data, (struct ptrace_bts_config __user *)addr); break; - case PTRACE_BTS_SIZE: - ret = ds_get_bts_index(child, /* pos = */ NULL); + case PTRACE_BTS_SIZE: { + size_t size; + + ret = ds_get_bts_index(child->bts, &size); + if (ret == 0) { + BUG_ON(size != (int) size); + ret = (int) size; + } break; + } case PTRACE_BTS_GET: ret = ptrace_bts_read_record @@ -1120,7 +1127,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_CLEAR: - ret = ds_clear_bts(child); + ret = ds_clear_bts(child->bts); break; case PTRACE_BTS_DRAIN: diff --git a/include/linux/sched.h b/include/linux/sched.h index bee1e93c95ad..a9780eaa6737 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -96,6 +96,7 @@ struct exec_domain; struct futex_pi_state; struct robust_list_head; struct bio; +struct bts_tracer; /* * List of flags we want to share for kernel threads, @@ -1161,6 +1162,14 @@ struct task_struct { struct list_head ptraced; struct list_head ptrace_entry; +#ifdef CONFIG_X86_PTRACE_BTS + /* + * This is the tracer handle for the ptrace BTS extension. + * This field actually belongs to the ptracer task. + */ + struct bts_tracer *bts; +#endif /* CONFIG_X86_PTRACE_BTS */ + /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; struct list_head thread_group; -- cgit v1.2.3 From 6abb11aecd888d1da6276399380b7355f127c006 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Tue, 25 Nov 2008 09:05:27 +0100 Subject: x86, bts, ptrace: move BTS buffer allocation from ds.c into ptrace.c Impact: restructure DS memory allocation to be done by the usage site of DS Require pre-allocated buffers in ds.h. Move the BTS buffer allocation for ptrace into ptrace.c. The pointer to the allocated buffer is stored in the traced task's task_struct together with the handle returned by ds_request_bts(). Removes memory accounting code. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ds.h | 12 +++---- arch/x86/kernel/ds.c | 92 ++++++++--------------------------------------- arch/x86/kernel/ptrace.c | 22 ++++++++++-- include/linux/sched.h | 4 +++ 4 files changed, 42 insertions(+), 88 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 0af997de5f01..99b6c39774a4 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -7,13 +7,12 @@ * * It manages: * - per-thread and per-cpu allocation of BTS and PEBS - * - buffer memory allocation (optional) - * - buffer overflow handling + * - buffer overflow handling (to be done) * - buffer access * * It assumes: - * - get_task_struct on all parameter tasks - * - current is allowed to trace parameter tasks + * - get_task_struct on all traced tasks + * - current is allowed to trace tasks * * * Copyright (C) 2007-2008 Intel Corporation. @@ -54,8 +53,7 @@ typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); * task: the task to request recording for; * NULL for per-cpu recording on the current cpu * base: the base pointer for the (non-pageable) buffer; - * NULL if buffer allocation requested - * size: the size of the requested or provided buffer in bytes + * size: the size of the provided buffer in bytes * ovfl: pointer to a function to be called on buffer overflow; * NULL if cyclic buffer requested * th: the interrupt threshold in records from the end of the buffer; @@ -72,8 +70,6 @@ extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, /* * Release BTS or PEBS resources * - * Frees buffers allocated on ds_request. - * * Returns 0 on success; -Eerrno otherwise * * tracer: the tracer handle returned from ds_request_~() diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 96768e9cce99..19a8c2c0389f 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -7,13 +7,12 @@ * * It manages: * - per-thread and per-cpu allocation of BTS and PEBS - * - buffer memory allocation (optional) - * - buffer overflow handling + * - buffer overflow handling (to be done) * - buffer access * * It assumes: - * - get_task_struct on all parameter tasks - * - current is allowed to trace parameter tasks + * - get_task_struct on all traced tasks + * - current is allowed to trace tasks * * * Copyright (C) 2007-2008 Intel Corporation. @@ -57,8 +56,6 @@ struct ds_tracer { /* the buffer provided on ds_request() and its size in bytes */ void *buffer; size_t size; - /* the number of allocated pages for on-request allocated buffers */ - unsigned int pages; }; struct bts_tracer { @@ -141,8 +138,7 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, /* - * Locking is done only for allocating BTS or PEBS resources and for - * guarding context and buffer memory allocation. + * Locking is done only for allocating BTS or PEBS resources. */ static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); @@ -292,50 +288,6 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) } -/* - * Allocate a non-pageable buffer of the parameter size. - * Checks the memory and the locked memory rlimit. - * - * Returns the buffer, if successful; - * NULL, if out of memory or rlimit exceeded. - * - * size: the requested buffer size in bytes - * pages (out): if not NULL, contains the number of pages reserved - */ -static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) -{ - unsigned long rlim, vm, pgsz; - void *buffer = NULL; - - pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; - - down_write(¤t->mm->mmap_sem); - - rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; - vm = current->mm->total_vm + pgsz; - if (rlim < vm) - goto out; - - rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - vm = current->mm->locked_vm + pgsz; - if (rlim < vm) - goto out; - - buffer = kzalloc(size, GFP_KERNEL); - if (!buffer) - goto out; - - current->mm->total_vm += pgsz; - current->mm->locked_vm += pgsz; - - if (pages) - *pages = pgsz; - - out: - up_write(¤t->mm->mmap_sem); - return buffer; -} - static void ds_install_ds_config(struct ds_context *context, enum ds_qualifier qual, void *base, size_t size, size_t ith) @@ -382,6 +334,10 @@ static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual, if (!ds_cfg.sizeof_ds) goto out; + error = -EINVAL; + if (!base) + goto out; + /* we require some space to do alignment adjustments below */ error = -EINVAL; if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) @@ -395,13 +351,6 @@ static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual, goto out; } - error = -ENOMEM; - if (!base) { - base = ds_allocate_buffer(size, &tracer->pages); - if (!base) - goto out; - } - tracer->buffer = base; tracer->size = size; @@ -466,7 +415,7 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, return tracer; out_tracer: - (void)ds_release_bts(tracer); + kfree(tracer); out: return ERR_PTR(error); } @@ -496,31 +445,18 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, return tracer; out_tracer: - (void)ds_release_pebs(tracer); + kfree(tracer); out: return ERR_PTR(error); } static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual) { - if (tracer->context) { - BUG_ON(tracer->context->owner[qual] != tracer); - tracer->context->owner[qual] = NULL; - - put_tracer(tracer->context->task); - ds_put_context(tracer->context); - } + BUG_ON(tracer->context->owner[qual] != tracer); + tracer->context->owner[qual] = NULL; - if (tracer->pages) { - kfree(tracer->buffer); - - down_write(¤t->mm->mmap_sem); - - current->mm->total_vm -= tracer->pages; - current->mm->locked_vm -= tracer->pages; - - up_write(¤t->mm->mmap_sem); - } + put_tracer(tracer->context->task); + ds_put_context(tracer->context); } int ds_release_bts(struct bts_tracer *tracer) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 76adf5b640ff..2c8ec1ba75e6 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -758,6 +758,10 @@ static int ptrace_bts_config(struct task_struct *child, bts_ovfl_callback_t ovfl = NULL; unsigned int sig = 0; + error = -EINVAL; + if (cfg.size < (10 * bts_cfg.sizeof_bts)) + goto errout; + if (cfg.flags & PTRACE_BTS_O_SIGNAL) { if (!cfg.signal) goto errout; @@ -768,14 +772,26 @@ static int ptrace_bts_config(struct task_struct *child, sig = cfg.signal; } - if (child->bts) + if (child->bts) { (void)ds_release_bts(child->bts); + kfree(child->bts_buffer); + + child->bts = NULL; + child->bts_buffer = NULL; + } + + error = -ENOMEM; + child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL); + if (!child->bts_buffer) + goto errout; - child->bts = ds_request_bts(child, /* base = */ NULL, cfg.size, + child->bts = ds_request_bts(child, child->bts_buffer, cfg.size, ovfl, /* th = */ (size_t)-1); if (IS_ERR(child->bts)) { error = PTR_ERR(child->bts); + kfree(child->bts_buffer); child->bts = NULL; + child->bts_buffer = NULL; goto errout; } @@ -972,6 +988,8 @@ void ptrace_disable(struct task_struct *child) #ifdef CONFIG_X86_PTRACE_BTS if (child->bts) { (void)ds_release_bts(child->bts); + kfree(child->bts_buffer); + child->bts_buffer = NULL; child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; if (!child->thread.debugctlmsr) diff --git a/include/linux/sched.h b/include/linux/sched.h index a9780eaa6737..d02a0ca70ee9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1168,6 +1168,10 @@ struct task_struct { * This field actually belongs to the ptracer task. */ struct bts_tracer *bts; + /* + * The buffer to hold the BTS data. + */ + void *bts_buffer; #endif /* CONFIG_X86_PTRACE_BTS */ /* PID/PID hash table linkage. */ -- cgit v1.2.3 From 65f233fb1669e6c990cd1d7fd308ac7dc66dc207 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 25 Nov 2008 18:20:13 +0100 Subject: netfilter: fix warning in net/netfilter/nf_conntrack_proto_tcp.c fix this warning: net/netfilter/nf_conntrack_proto_tcp.c: In function \u2018tcp_in_window\u2019: net/netfilter/nf_conntrack_proto_tcp.c:491: warning: unused variable \u2018net\u2019 net/netfilter/nf_conntrack_proto_tcp.c: In function \u2018tcp_packet\u2019: net/netfilter/nf_conntrack_proto_tcp.c:812: warning: unused variable \u2018net\u2019 Signed-off-by: Ingo Molnar Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_l4proto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 7f2f43c77284..debdaf75cecf 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -129,7 +129,7 @@ extern const struct nla_policy nf_ct_port_nla_policy[]; && net_ratelimit()) #endif #else -#define LOG_INVALID(net, proto) 0 +static inline int LOG_INVALID(struct net *net, int proto) { return 0; } #endif /* CONFIG_SYSCTL */ #endif /*_NF_CONNTRACK_PROTOCOL_H*/ -- cgit v1.2.3 From 3f2355cb9111ac04e7ae06a4d7044da2ae813863 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 12 Nov 2008 14:22:02 -0800 Subject: cfg80211/mac80211: Add 802.11d support This adds country IE parsing to mac80211 and enables its usage within the new regulatory infrastructure in cfg80211. We parse the country IEs only on management beacons for the BSSID you are associated to and disregard the IEs when the country and environment (indoor, outdoor, any) matches the already processed country IE. To avoid following misinformed or outdated APs we build and use a regulatory domain out of the intersection between what the AP provides us on the country IE and what CRDA is aware is allowed on the same country. A secondary device is allowed to follow only the same country IE as it make no sense for two devices on a system to be in two different countries. In the case the AP is using country IEs for an incorrect country the user may help compliance further by setting the regulatory domain before or after the IE is parsed and in that case another intersection will be performed. CONFIG_WIRELESS_OLD_REGULATORY is supported but requires CRDA present. Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 62 ++++++ include/net/wireless.h | 15 ++ net/mac80211/mlme.c | 7 + net/wireless/Kconfig | 11 ++ net/wireless/core.c | 5 +- net/wireless/core.h | 13 ++ net/wireless/nl80211.c | 2 +- net/wireless/reg.c | 479 ++++++++++++++++++++++++++++++++++++++++++++-- net/wireless/reg.h | 21 +- 9 files changed, 591 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 56b0eb25d927..a6ec928186ad 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1042,6 +1042,68 @@ enum ieee80211_spectrum_mgmt_actioncode { WLAN_ACTION_SPCT_CHL_SWITCH = 4, }; +/* + * IEEE 802.11-2007 7.3.2.9 Country information element + * + * Minimum length is 8 octets, ie len must be evenly + * divisible by 2 + */ + +/* Although the spec says 8 I'm seeing 6 in practice */ +#define IEEE80211_COUNTRY_IE_MIN_LEN 6 + +/* + * For regulatory extension stuff see IEEE 802.11-2007 + * Annex I (page 1141) and Annex J (page 1147). Also + * review 7.3.2.9. + * + * When dot11RegulatoryClassesRequired is true and the + * first_channel/reg_extension_id is >= 201 then the IE + * compromises of the 'ext' struct represented below: + * + * - Regulatory extension ID - when generating IE this just needs + * to be monotonically increasing for each triplet passed in + * the IE + * - Regulatory class - index into set of rules + * - Coverage class - index into air propagation time (Table 7-27), + * in microseconds, you can compute the air propagation time from + * the index by multiplying by 3, so index 10 yields a propagation + * of 10 us. Valid values are 0-31, values 32-255 are not defined + * yet. A value of 0 inicates air propagation of <= 1 us. + * + * See also Table I.2 for Emission limit sets and table + * I.3 for Behavior limit sets. Table J.1 indicates how to map + * a reg_class to an emission limit set and behavior limit set. + */ +#define IEEE80211_COUNTRY_EXTENSION_ID 201 + +/* + * Channels numbers in the IE must be monotonically increasing + * if dot11RegulatoryClassesRequired is not true. + * + * If dot11RegulatoryClassesRequired is true consecutive + * subband triplets following a regulatory triplet shall + * have monotonically increasing first_channel number fields. + * + * Channel numbers shall not overlap. + * + * Note that max_power is signed. + */ +struct ieee80211_country_ie_triplet { + union { + struct { + u8 first_channel; + u8 num_channels; + s8 max_power; + } __attribute__ ((packed)) chans; + struct { + u8 reg_extension_id; + u8 reg_class; + u8 coverage_class; + } __attribute__ ((packed)) ext; + }; +} __attribute__ ((packed)); + /* BACK action code */ enum ieee80211_back_actioncode { WLAN_ACTION_ADDBA_REQ = 0, diff --git a/include/net/wireless.h b/include/net/wireless.h index 412351560b76..c85fa0ea5c51 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -373,4 +373,19 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, * for a regulatory domain structure for the respective country. */ extern void regulatory_hint(struct wiphy *wiphy, const char *alpha2); + +/** + * regulatory_hint_11d - hints a country IE as a regulatory domain + * @wiphy: the wireless device giving the hint (used only for reporting + * conflicts) + * @country_ie: pointer to the country IE + * @country_ie_len: length of the country IE + * + * We will intersect the rd with the what CRDA tells us should apply + * for the alpha2 this country IE belongs to, this prevents APs from + * sending us incorrect or outdated information against a country. + */ +extern void regulatory_hint_11d(struct wiphy *wiphy, + u8 *country_ie, + u8 country_ie_len); #endif /* __NET_WIRELESS_H */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d81a4d2cd3aa..30adaddeed27 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1736,6 +1736,13 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ap_ht_cap_flags); } + if (elems.country_elem) { + /* Note we are only reviewing this on beacons + * for the BSSID we are associated to */ + regulatory_hint_11d(local->hw.wiphy, + elems.country_elem, elems.country_elem_len); + } + ieee80211_bss_info_change_notify(sdata, changed); } diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index f7c64dbe86cc..e28e2b8fa436 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -1,6 +1,15 @@ config CFG80211 tristate "Improved wireless configuration API" +config CFG80211_REG_DEBUG + bool "cfg80211 regulatory debugging" + depends on CFG80211 + default n + ---help--- + You can enable this if you want to debug regulatory changes. + + If unsure, say N. + config NL80211 bool "nl80211 new netlink interface support" depends on CFG80211 @@ -40,6 +49,8 @@ config WIRELESS_OLD_REGULATORY ieee80211_regdom module parameter. This is being phased out and you should stop using them ASAP. + Note: You will need CRDA if you want 802.11d support + Say Y unless you have installed a new userspace application. Also say Y if have one currently depending on the ieee80211_regdom module parameter and cannot port it to use the new userspace diff --git a/net/wireless/core.c b/net/wireless/core.c index 39e3d10fccde..b96fc0c3f1c4 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -19,7 +19,6 @@ #include "nl80211.h" #include "core.h" #include "sysfs.h" -#include "reg.h" /* name for sysfs, %d is appended */ #define PHY_NAME "phy" @@ -348,6 +347,10 @@ void wiphy_unregister(struct wiphy *wiphy) /* unlock again before freeing */ mutex_unlock(&drv->mtx); + /* If this device got a regulatory hint tell core its + * free to listen now to a new shiny device regulatory hint */ + reg_device_remove(wiphy); + list_del(&drv->list); device_del(&drv->wiphy.dev); debugfs_remove(drv->wiphy.debugfsdir); diff --git a/net/wireless/core.h b/net/wireless/core.h index 771cc5cc7658..f7fb9f413028 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -11,6 +11,7 @@ #include #include #include +#include "reg.h" struct cfg80211_registered_device { struct cfg80211_ops *ops; @@ -21,6 +22,18 @@ struct cfg80211_registered_device { * any call is in progress */ struct mutex mtx; + /* ISO / IEC 3166 alpha2 for which this device is receiving + * country IEs on, this can help disregard country IEs from APs + * on the same alpha2 quickly. The alpha2 may differ from + * cfg80211_regdomain's alpha2 when an intersection has occurred. + * If the AP is reconfigured this can also be used to tell us if + * the country on the country IE changed. */ + char country_ie_alpha2[2]; + + /* If a Country IE has been received this tells us the environment + * which its telling us its in. This defaults to ENVIRON_ANY */ + enum environment_cap env; + /* wiphy index, internal only */ int idx; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e3e1494e769a..00121ceddb14 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1760,7 +1760,7 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) return -EINVAL; #endif mutex_lock(&cfg80211_drv_mutex); - r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data); + r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, 0, ENVIRON_ANY); mutex_unlock(&cfg80211_drv_mutex); return r; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index f0ff3d1779da..4dab993ea488 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -60,12 +60,18 @@ * @intersect: indicates whether the wireless core should intersect * the requested regulatory domain with the presently set regulatory * domain. + * @country_ie_checksum: checksum of the last processed and accepted + * country IE + * @country_ie_env: lets us know if the AP is telling us we are outdoor, + * indoor, or if it doesn't matter */ struct regulatory_request { struct wiphy *wiphy; enum reg_set_by initiator; char alpha2[2]; bool intersect; + u32 country_ie_checksum; + enum environment_cap country_ie_env; }; /* Receipt of information from last regulatory request */ @@ -85,6 +91,11 @@ static u32 supported_bandwidths[] = { * information to give us an alpha2 */ static const struct ieee80211_regdomain *cfg80211_regdomain; +/* We use this as a place for the rd structure built from the + * last parsed country IE to rest until CRDA gets back to us with + * what it thinks should apply for the same country */ +static const struct ieee80211_regdomain *country_ie_regdomain; + /* We keep a static world regulatory domain in case of the absence of CRDA */ static const struct ieee80211_regdomain world_regdom = { .n_reg_rules = 1, @@ -264,6 +275,18 @@ static bool is_unknown_alpha2(const char *alpha2) return false; } +static bool is_intersected_alpha2(const char *alpha2) +{ + if (!alpha2) + return false; + /* Special case where regulatory domain is the + * result of an intersection between two regulatory domain + * structures */ + if (alpha2[0] == '9' && alpha2[1] == '8') + return true; + return false; +} + static bool is_an_alpha2(const char *alpha2) { if (!alpha2) @@ -292,6 +315,25 @@ static bool regdom_changed(const char *alpha2) return true; } +/** + * country_ie_integrity_changes - tells us if the country IE has changed + * @checksum: checksum of country IE of fields we are interested in + * + * If the country IE has not changed you can ignore it safely. This is + * useful to determine if two devices are seeing two different country IEs + * even on the same alpha2. Note that this will return false if no IE has + * been set on the wireless core yet. + */ +static bool country_ie_integrity_changes(u32 checksum) +{ + /* If no IE has been set then the checksum doesn't change */ + if (unlikely(!last_request->country_ie_checksum)) + return false; + if (unlikely(last_request->country_ie_checksum != checksum)) + return true; + return false; +} + /* This lets us keep regulatory code which is updated on a regulatory * basis in userspace. */ static int call_crda(const char *alpha2) @@ -379,6 +421,174 @@ static u32 freq_max_bandwidth(const struct ieee80211_freq_range *freq_range, return 0; } +/* Converts a country IE to a regulatory domain. A regulatory domain + * structure has a lot of information which the IE doesn't yet have, + * so for the other values we use upper max values as we will intersect + * with our userspace regulatory agent to get lower bounds. */ +static struct ieee80211_regdomain *country_ie_2_rd( + u8 *country_ie, + u8 country_ie_len, + u32 *checksum) +{ + struct ieee80211_regdomain *rd = NULL; + unsigned int i = 0; + char alpha2[2]; + u32 flags = 0; + u32 num_rules = 0, size_of_regd = 0; + u8 *triplets_start = NULL; + u8 len_at_triplet = 0; + /* the last channel we have registered in a subband (triplet) */ + int last_sub_max_channel = 0; + + *checksum = 0xDEADBEEF; + + /* Country IE requirements */ + BUG_ON(country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN || + country_ie_len & 0x01); + + alpha2[0] = country_ie[0]; + alpha2[1] = country_ie[1]; + + /* + * Third octet can be: + * 'I' - Indoor + * 'O' - Outdoor + * + * anything else we assume is no restrictions + */ + if (country_ie[2] == 'I') + flags = NL80211_RRF_NO_OUTDOOR; + else if (country_ie[2] == 'O') + flags = NL80211_RRF_NO_INDOOR; + + country_ie += 3; + country_ie_len -= 3; + + triplets_start = country_ie; + len_at_triplet = country_ie_len; + + *checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8); + + /* We need to build a reg rule for each triplet, but first we must + * calculate the number of reg rules we will need. We will need one + * for each channel subband */ + while (country_ie_len >= 3) { + struct ieee80211_country_ie_triplet *triplet = + (struct ieee80211_country_ie_triplet *) country_ie; + int cur_sub_max_channel = 0, cur_channel = 0; + + if (triplet->ext.reg_extension_id >= + IEEE80211_COUNTRY_EXTENSION_ID) { + country_ie += 3; + country_ie_len -= 3; + continue; + } + + cur_channel = triplet->chans.first_channel; + cur_sub_max_channel = ieee80211_channel_to_frequency( + cur_channel + triplet->chans.num_channels); + + /* Basic sanity check */ + if (cur_sub_max_channel < cur_channel) + return NULL; + + /* Do not allow overlapping channels. Also channels + * passed in each subband must be monotonically + * increasing */ + if (last_sub_max_channel) { + if (cur_channel <= last_sub_max_channel) + return NULL; + if (cur_sub_max_channel <= last_sub_max_channel) + return NULL; + } + + /* When dot11RegulatoryClassesRequired is supported + * we can throw ext triplets as part of this soup, + * for now we don't care when those change as we + * don't support them */ + *checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) | + ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) | + ((triplet->chans.max_power ^ cur_sub_max_channel) << 24); + + last_sub_max_channel = cur_sub_max_channel; + + country_ie += 3; + country_ie_len -= 3; + num_rules++; + + /* Note: this is not a IEEE requirement but + * simply a memory requirement */ + if (num_rules > NL80211_MAX_SUPP_REG_RULES) + return NULL; + } + + country_ie = triplets_start; + country_ie_len = len_at_triplet; + + size_of_regd = sizeof(struct ieee80211_regdomain) + + (num_rules * sizeof(struct ieee80211_reg_rule)); + + rd = kzalloc(size_of_regd, GFP_KERNEL); + if (!rd) + return NULL; + + rd->n_reg_rules = num_rules; + rd->alpha2[0] = alpha2[0]; + rd->alpha2[1] = alpha2[1]; + + /* This time around we fill in the rd */ + while (country_ie_len >= 3) { + struct ieee80211_country_ie_triplet *triplet = + (struct ieee80211_country_ie_triplet *) country_ie; + struct ieee80211_reg_rule *reg_rule = NULL; + struct ieee80211_freq_range *freq_range = NULL; + struct ieee80211_power_rule *power_rule = NULL; + + /* Must parse if dot11RegulatoryClassesRequired is true, + * we don't support this yet */ + if (triplet->ext.reg_extension_id >= + IEEE80211_COUNTRY_EXTENSION_ID) { + country_ie += 3; + country_ie_len -= 3; + continue; + } + + reg_rule = &rd->reg_rules[i]; + freq_range = ®_rule->freq_range; + power_rule = ®_rule->power_rule; + + reg_rule->flags = flags; + + /* The +10 is since the regulatory domain expects + * the actual band edge, not the center of freq for + * its start and end freqs, assuming 20 MHz bandwidth on + * the channels passed */ + freq_range->start_freq_khz = + MHZ_TO_KHZ(ieee80211_channel_to_frequency( + triplet->chans.first_channel) - 10); + freq_range->end_freq_khz = + MHZ_TO_KHZ(ieee80211_channel_to_frequency( + triplet->chans.first_channel + + triplet->chans.num_channels) + 10); + + /* Large arbitrary values, we intersect later */ + /* Increment this if we ever support >= 40 MHz channels + * in IEEE 802.11 */ + freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40); + power_rule->max_antenna_gain = DBI_TO_MBI(100); + power_rule->max_eirp = DBM_TO_MBM(100); + + country_ie += 3; + country_ie_len -= 3; + i++; + + BUG_ON(i > NL80211_MAX_SUPP_REG_RULES); + } + + return rd; +} + + /* Helper for regdom_intersect(), this does the real * mathematical intersection fun */ static int reg_rules_intersect( @@ -663,16 +873,14 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, return -EOPNOTSUPP; return -EALREADY; } - /* Two consecutive Country IE hints on the same wiphy */ - if (!alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) + /* Two consecutive Country IE hints on the same wiphy. + * This should be picked up early by the driver/stack */ + if (WARN_ON(!alpha2_equal(cfg80211_regdomain->alpha2, + alpha2))) return 0; return -EALREADY; } - /* - * Ignore Country IE hints for now, need to think about - * what we need to do to support multi-domain operation. - */ - return -EOPNOTSUPP; + return REG_INTERSECT; case REGDOM_SET_BY_DRIVER: if (last_request->initiator == REGDOM_SET_BY_DRIVER) return -EALREADY; @@ -680,6 +888,11 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, case REGDOM_SET_BY_USER: if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) return REG_INTERSECT; + /* If the user knows better the user should set the regdom + * to their country before the IE is picked up */ + if (last_request->initiator == REGDOM_SET_BY_USER && + last_request->intersect) + return -EOPNOTSUPP; return 0; } @@ -688,7 +901,9 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, /* Caller must hold &cfg80211_drv_mutex */ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, - const char *alpha2) + const char *alpha2, + u32 country_ie_checksum, + enum environment_cap env) { struct regulatory_request *request; bool intersect = false; @@ -711,9 +926,21 @@ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, request->initiator = set_by; request->wiphy = wiphy; request->intersect = intersect; + request->country_ie_checksum = country_ie_checksum; + request->country_ie_env = env; kfree(last_request); last_request = request; + /* + * Note: When CONFIG_WIRELESS_OLD_REGULATORY is enabled + * AND if CRDA is NOT present nothing will happen, if someone + * wants to bother with 11d with OLD_REG you can add a timer. + * If after x amount of time nothing happens you can call: + * + * return set_regdom(country_ie_regdomain); + * + * to intersect with the static rd + */ return call_crda(alpha2); } @@ -722,11 +949,120 @@ void regulatory_hint(struct wiphy *wiphy, const char *alpha2) BUG_ON(!alpha2); mutex_lock(&cfg80211_drv_mutex); - __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2); + __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, 0, ENVIRON_ANY); mutex_unlock(&cfg80211_drv_mutex); } EXPORT_SYMBOL(regulatory_hint); +static bool reg_same_country_ie_hint(struct wiphy *wiphy, + u32 country_ie_checksum) +{ + if (!last_request->wiphy) + return false; + if (likely(last_request->wiphy != wiphy)) + return !country_ie_integrity_changes(country_ie_checksum); + /* We should not have let these through at this point, they + * should have been picked up earlier by the first alpha2 check + * on the device */ + if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum))) + return true; + return false; +} + +void regulatory_hint_11d(struct wiphy *wiphy, + u8 *country_ie, + u8 country_ie_len) +{ + struct ieee80211_regdomain *rd = NULL; + char alpha2[2]; + u32 checksum = 0; + enum environment_cap env = ENVIRON_ANY; + + mutex_lock(&cfg80211_drv_mutex); + + /* IE len must be evenly divisible by 2 */ + if (country_ie_len & 0x01) + goto out; + + if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) + goto out; + + /* Pending country IE processing, this can happen after we + * call CRDA and wait for a response if a beacon was received before + * we were able to process the last regulatory_hint_11d() call */ + if (country_ie_regdomain) + goto out; + + alpha2[0] = country_ie[0]; + alpha2[1] = country_ie[1]; + + if (country_ie[2] == 'I') + env = ENVIRON_INDOOR; + else if (country_ie[2] == 'O') + env = ENVIRON_OUTDOOR; + + /* We will run this for *every* beacon processed for the BSSID, so + * we optimize an early check to exit out early if we don't have to + * do anything */ + if (likely(last_request->wiphy)) { + struct cfg80211_registered_device *drv_last_ie; + + drv_last_ie = wiphy_to_dev(last_request->wiphy); + + /* Lets keep this simple -- we trust the first AP + * after we intersect with CRDA */ + if (likely(last_request->wiphy == wiphy)) { + /* Ignore IEs coming in on this wiphy with + * the same alpha2 and environment cap */ + if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, + alpha2) && + env == drv_last_ie->env)) { + goto out; + } + /* the wiphy moved on to another BSSID or the AP + * was reconfigured. XXX: We need to deal with the + * case where the user suspends and goes to goes + * to another country, and then gets IEs from an + * AP with different settings */ + goto out; + } else { + /* Ignore IEs coming in on two separate wiphys with + * the same alpha2 and environment cap */ + if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, + alpha2) && + env == drv_last_ie->env)) { + goto out; + } + /* We could potentially intersect though */ + goto out; + } + } + + rd = country_ie_2_rd(country_ie, country_ie_len, &checksum); + if (!rd) + goto out; + + /* This will not happen right now but we leave it here for the + * the future when we want to add suspend/resume support and having + * the user move to another country after doing so, or having the user + * move to another AP. Right now we just trust the first AP. This is why + * this is marked as likley(). If we hit this before we add this support + * we want to be informed of it as it would indicate a mistake in the + * current design */ + if (likely(WARN_ON(reg_same_country_ie_hint(wiphy, checksum)))) + goto out; + + /* We keep this around for when CRDA comes back with a response so + * we can intersect with that */ + country_ie_regdomain = rd; + + __regulatory_hint(wiphy, REGDOM_SET_BY_COUNTRY_IE, + country_ie_regdomain->alpha2, checksum, env); + +out: + mutex_unlock(&cfg80211_drv_mutex); +} +EXPORT_SYMBOL(regulatory_hint_11d); static void print_rd_rules(const struct ieee80211_regdomain *rd) { @@ -766,7 +1102,25 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) static void print_regdomain(const struct ieee80211_regdomain *rd) { - if (is_world_regdom(rd->alpha2)) + if (is_intersected_alpha2(rd->alpha2)) { + struct wiphy *wiphy = NULL; + struct cfg80211_registered_device *drv; + + if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { + if (last_request->wiphy) { + wiphy = last_request->wiphy; + drv = wiphy_to_dev(wiphy); + printk(KERN_INFO "cfg80211: Current regulatory " + "domain updated by AP to: %c%c\n", + drv->country_ie_alpha2[0], + drv->country_ie_alpha2[1]); + } else + printk(KERN_INFO "cfg80211: Current regulatory " + "domain intersected: \n"); + } else + printk(KERN_INFO "cfg80211: Current regulatory " + "intersected: \n"); + } else if (is_world_regdom(rd->alpha2)) printk(KERN_INFO "cfg80211: World regulatory " "domain updated:\n"); else { @@ -789,10 +1143,39 @@ static void print_regdomain_info(const struct ieee80211_regdomain *rd) print_rd_rules(rd); } +#ifdef CONFIG_CFG80211_REG_DEBUG +static void reg_country_ie_process_debug( + const struct ieee80211_regdomain *rd, + const struct ieee80211_regdomain *country_ie_regdomain, + const struct ieee80211_regdomain *intersected_rd) +{ + printk(KERN_DEBUG "cfg80211: Received country IE:\n"); + print_regdomain_info(country_ie_regdomain); + printk(KERN_DEBUG "cfg80211: CRDA thinks this should applied:\n"); + print_regdomain_info(rd); + if (intersected_rd) { + printk(KERN_DEBUG "cfg80211: We intersect both of these " + "and get:\n"); + print_regdomain_info(rd); + return; + } + printk(KERN_DEBUG "cfg80211: Intersection between both failed\n"); +} +#else +static inline void reg_country_ie_process_debug( + const struct ieee80211_regdomain *rd, + const struct ieee80211_regdomain *country_ie_regdomain, + const struct ieee80211_regdomain *intersected_rd) +{ +} +#endif + /* Takes ownership of rd only if it doesn't fail */ static int __set_regdom(const struct ieee80211_regdomain *rd) { const struct ieee80211_regdomain *intersected_rd = NULL; + struct cfg80211_registered_device *drv = NULL; + struct wiphy *wiphy = NULL; /* Some basic sanity checks first */ if (is_world_regdom(rd->alpha2)) { @@ -809,10 +1192,18 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (!last_request) return -EINVAL; - /* allow overriding the static definitions if CRDA is present */ - if (!is_old_static_regdom(cfg80211_regdomain) && - !regdom_changed(rd->alpha2)) - return -EINVAL; + /* Lets only bother proceeding on the same alpha2 if the current + * rd is non static (it means CRDA was present and was used last) + * and the pending request came in from a country IE */ + if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) { + /* If someone else asked us to change the rd lets only bother + * checking if the alpha2 changes if CRDA was already called */ + if (!is_old_static_regdom(cfg80211_regdomain) && + !regdom_changed(rd->alpha2)) + return -EINVAL; + } + + wiphy = last_request->wiphy; /* Now lets set the regulatory domain, update all driver channels * and finally inform them of what we have done, in case they want @@ -853,9 +1244,47 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) return 0; } - /* Country IE parsing coming soon */ + /* + * Country IE requests are handled a bit differently, we intersect + * the country IE rd with what CRDA believes that country should have + */ + + BUG_ON(!country_ie_regdomain); + + if (rd != country_ie_regdomain) { + /* Intersect what CRDA returned and our what we + * had built from the Country IE received */ + + intersected_rd = regdom_intersect(rd, country_ie_regdomain); + + reg_country_ie_process_debug(rd, country_ie_regdomain, + intersected_rd); + + kfree(country_ie_regdomain); + country_ie_regdomain = NULL; + } else { + /* This would happen when CRDA was not present and + * OLD_REGULATORY was enabled. We intersect our Country + * IE rd and what was set on cfg80211 originally */ + intersected_rd = regdom_intersect(rd, cfg80211_regdomain); + } + + if (!intersected_rd) + return -EINVAL; + + drv = wiphy_to_dev(wiphy); + + drv->country_ie_alpha2[0] = rd->alpha2[0]; + drv->country_ie_alpha2[1] = rd->alpha2[1]; + drv->env = last_request->country_ie_env; + + BUG_ON(intersected_rd == rd); + + kfree(rd); + rd = NULL; + reset_regdomains(); - WARN_ON(1); + cfg80211_regdomain = intersected_rd; return 0; } @@ -887,6 +1316,17 @@ int set_regdom(const struct ieee80211_regdomain *rd) return r; } +/* Caller must hold cfg80211_drv_mutex */ +void reg_device_remove(struct wiphy *wiphy) +{ + if (!last_request->wiphy) + return; + if (last_request->wiphy != wiphy) + return; + last_request->wiphy = NULL; + last_request->country_ie_env = ENVIRON_ANY; +} + int regulatory_init(void) { int err; @@ -906,11 +1346,11 @@ int regulatory_init(void) * that is not a valid ISO / IEC 3166 alpha2 */ if (ieee80211_regdom[0] != 'E' || ieee80211_regdom[1] != 'U') err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, - ieee80211_regdom); + ieee80211_regdom, 0, ENVIRON_ANY); #else cfg80211_regdomain = cfg80211_world_regdom; - err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00"); + err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", 0, ENVIRON_ANY); if (err) printk(KERN_ERR "cfg80211: calling CRDA failed - " "unable to update world regulatory domain, " @@ -926,6 +1366,9 @@ void regulatory_exit(void) reset_regdomains(); + kfree(country_ie_regdomain); + country_ie_regdomain = NULL; + kfree(last_request); platform_device_unregister(reg_pdev); diff --git a/net/wireless/reg.h b/net/wireless/reg.h index c9b6b6358bbe..a76ea3ff7cd6 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -4,28 +4,41 @@ bool is_world_regdom(const char *alpha2); bool reg_is_valid_request(const char *alpha2); +void reg_device_remove(struct wiphy *wiphy); + int regulatory_init(void); void regulatory_exit(void); int set_regdom(const struct ieee80211_regdomain *rd); +enum environment_cap { + ENVIRON_ANY, + ENVIRON_INDOOR, + ENVIRON_OUTDOOR, +}; + + /** * __regulatory_hint - hint to the wireless core a regulatory domain * @wiphy: if the hint comes from country information from an AP, this * is required to be set to the wiphy that received the information * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain * should be in. + * @country_ie_checksum: checksum of processed country IE, set this to 0 + * if the hint did not come from a country IE + * @country_ie_env: the environment the IE told us we are in, %ENVIRON_* * * The Wireless subsystem can use this function to hint to the wireless core - * what it believes should be the current regulatory domain by - * giving it an ISO/IEC 3166 alpha2 country code it knows its regulatory - * domain should be in. + * what it believes should be the current regulatory domain by giving it an + * ISO/IEC 3166 alpha2 country code it knows its regulatory domain should be + * in. * * Returns zero if all went fine, %-EALREADY if a regulatory domain had * already been set or other standard error codes. * */ extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, - const char *alpha2); + const char *alpha2, u32 country_ie_checksum, + enum environment_cap country_ie_env); #endif /* __NET_WIRELESS_REG_H */ -- cgit v1.2.3 From 14b9815af3f4fe0e171ee0c4325c31d2a2c1570b Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 12 Nov 2008 14:22:03 -0800 Subject: cfg80211: add support for custom firmware regulatory solutions This adds API to cfg80211 to allow wireless drivers to inform us if their firmware can handle regulatory considerations *and* they cannot map these regulatory domains to an ISO / IEC 3166 alpha2. In these cases we skip the first regulatory hint instead of expecting the driver to build their own regulatory structure, providing us with an alpha2, or using the reg_notifier(). Signed-off-by: Luis R. Rodriguez Acked-by: Zhu Yi Signed-off-by: John W. Linville --- include/net/wireless.h | 7 +++++++ net/wireless/reg.c | 13 ++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/wireless.h b/include/net/wireless.h index c85fa0ea5c51..21c5d966142d 100644 --- a/include/net/wireless.h +++ b/include/net/wireless.h @@ -181,6 +181,11 @@ struct ieee80211_supported_band { * struct wiphy - wireless hardware description * @idx: the wiphy index assigned to this item * @class_dev: the class device representing /sys/class/ieee80211/ + * @fw_handles_regulatory: tells us the firmware for this device + * has its own regulatory solution and cannot identify the + * ISO / IEC 3166 alpha2 it belongs to. When this is enabled + * we will disregard the first regulatory hint (when the + * initiator is %REGDOM_SET_BY_CORE). * @reg_notifier: the driver's regulatory notification callback */ struct wiphy { @@ -192,6 +197,8 @@ struct wiphy { /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ u16 interface_modes; + bool fw_handles_regulatory; + /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't * know whether it points to a wiphy your driver has registered diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 4dab993ea488..0990059f7e48 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -816,12 +816,23 @@ static void handle_band(struct ieee80211_supported_band *sband) handle_channel(&sband->channels[i]); } +static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby) +{ + if (!last_request) + return true; + if (setby == REGDOM_SET_BY_CORE && + wiphy->fw_handles_regulatory) + return true; + return false; +} + static void update_all_wiphy_regulatory(enum reg_set_by setby) { struct cfg80211_registered_device *drv; list_for_each_entry(drv, &cfg80211_drv_list, list) - wiphy_update_regulatory(&drv->wiphy, setby); + if (!ignore_reg_update(&drv->wiphy, setby)) + wiphy_update_regulatory(&drv->wiphy, setby); } void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) -- cgit v1.2.3 From 8eecaba900e89643029fd2c253ad8ebb60761165 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 25 Nov 2008 13:45:29 -0800 Subject: tcp: tcp_limit_reno_sacked can become static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- net/ipv4/tcp_input.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 265392470b26..e8ae90a8c35e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -761,8 +761,6 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) return tp->packets_out - tcp_left_out(tp) + tp->retrans_out; } -extern int tcp_limit_reno_sacked(struct tcp_sock *tp); - /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. * The exception is rate halving phase, when cwnd is decreasing towards * ssthresh. diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9f8a80ba17bd..d67b6e9cc540 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1940,7 +1940,7 @@ out: /* Limits sacked_out so that sum with lost_out isn't ever larger than * packets_out. Returns zero if sacked_out adjustement wasn't necessary. */ -int tcp_limit_reno_sacked(struct tcp_sock *tp) +static int tcp_limit_reno_sacked(struct tcp_sock *tp) { u32 holes; -- cgit v1.2.3 From fb52607afcd0629776f1dc9e657647ceae81dd50 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 25 Nov 2008 21:07:04 +0100 Subject: tracing/function-return-tracer: change the name into function-graph-tracer Impact: cleanup This patch changes the name of the "return function tracer" into function-graph-tracer which is a more suitable name for a tracing which makes one able to retrieve the ordered call stack during the code flow. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/ftrace.h | 4 +- arch/x86/kernel/Makefile | 4 +- arch/x86/kernel/entry_32.S | 12 ++--- arch/x86/kernel/ftrace.c | 12 ++--- include/linux/ftrace.h | 24 ++++----- include/linux/ftrace_irq.h | 2 +- include/linux/sched.h | 2 +- kernel/Makefile | 2 +- kernel/fork.c | 4 +- kernel/sched.c | 2 +- kernel/trace/Kconfig | 19 ++++--- kernel/trace/Makefile | 2 +- kernel/trace/ftrace.c | 26 +++++----- kernel/trace/trace.c | 18 +++---- kernel/trace/trace.h | 12 ++--- kernel/trace/trace_functions_graph.c | 98 ++++++++++++++++++++++++++++++++++++ 17 files changed, 173 insertions(+), 72 deletions(-) create mode 100644 kernel/trace/trace_functions_graph.c (limited to 'include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e49a4fd718fe..0842b1127684 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -29,7 +29,7 @@ config X86 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_RET_TRACER if X86_32 + select HAVE_FUNCTION_GRAPH_TRACER if X86_32 select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 754a3e082f94..7e61b4ceb9a4 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -28,7 +28,7 @@ struct dyn_arch_ftrace { #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER #ifndef __ASSEMBLY__ @@ -51,6 +51,6 @@ struct ftrace_ret_stack { extern void return_to_handler(void); #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_FUNCTION_RET_TRACER */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index af2bc36ca1c4..64939a0c3986 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -14,7 +14,7 @@ CFLAGS_REMOVE_paravirt-spinlocks.o = -pg CFLAGS_REMOVE_ftrace.o = -pg endif -ifdef CONFIG_FUNCTION_RET_TRACER +ifdef CONFIG_FUNCTION_GRAPH_TRACER # Don't trace __switch_to() but let it for function tracer CFLAGS_REMOVE_process_32.o = -pg endif @@ -70,7 +70,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_RET_TRACER) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 74defe21ba42..2b1f0f081a6b 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1188,9 +1188,9 @@ ENTRY(mcount) cmpl $ftrace_stub, ftrace_trace_function jnz trace -#ifdef CONFIG_FUNCTION_RET_TRACER - cmpl $ftrace_stub, ftrace_function_return - jnz ftrace_return_caller +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + cmpl $ftrace_stub, ftrace_graph_function + jnz ftrace_graph_caller #endif .globl ftrace_stub ftrace_stub: @@ -1215,8 +1215,8 @@ END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ -#ifdef CONFIG_FUNCTION_RET_TRACER -ENTRY(ftrace_return_caller) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) cmpl $0, function_trace_stop jne ftrace_stub @@ -1230,7 +1230,7 @@ ENTRY(ftrace_return_caller) popl %ecx popl %eax ret -END(ftrace_return_caller) +END(ftrace_graph_caller) .globl return_to_handler return_to_handler: diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index bb137f7297ed..3595a4c14aba 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -323,7 +323,7 @@ int __init ftrace_dyn_arch_init(void *data) } #endif -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER #ifndef CONFIG_DYNAMIC_FTRACE @@ -389,11 +389,11 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time, */ unsigned long ftrace_return_to_handler(void) { - struct ftrace_retfunc trace; + struct ftrace_graph_ret trace; pop_return_trace(&trace.ret, &trace.calltime, &trace.func, &trace.overrun); trace.rettime = cpu_clock(raw_smp_processor_id()); - ftrace_function_return(&trace); + ftrace_graph_function(&trace); return trace.ret; } @@ -440,12 +440,12 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) ); if (WARN_ON(faulted)) { - unregister_ftrace_return(); + unregister_ftrace_graph(); return; } if (WARN_ON(!__kernel_text_address(old))) { - unregister_ftrace_return(); + unregister_ftrace_graph(); *parent = old; return; } @@ -456,4 +456,4 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) *parent = old; } -#endif /* CONFIG_FUNCTION_RET_TRACER */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7854d87b97b2..b4ac734ad8d6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -115,8 +115,8 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); -#ifdef CONFIG_FUNCTION_RET_TRACER -extern void ftrace_return_caller(void); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +extern void ftrace_graph_caller(void); #endif /** @@ -315,7 +315,7 @@ ftrace_init_module(struct module *mod, /* * Structure that defines a return function trace. */ -struct ftrace_retfunc { +struct ftrace_graph_ret { unsigned long ret; /* Return address */ unsigned long func; /* Current function */ unsigned long long calltime; @@ -324,22 +324,22 @@ struct ftrace_retfunc { unsigned long overrun; }; -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of a callback handler of tracing return function */ -typedef void (*trace_function_return_t)(struct ftrace_retfunc *); +typedef void (*trace_function_graph_t)(struct ftrace_graph_ret *); -extern int register_ftrace_return(trace_function_return_t func); +extern int register_ftrace_graph(trace_function_graph_t func); /* The current handler in use */ -extern trace_function_return_t ftrace_function_return; -extern void unregister_ftrace_return(void); +extern trace_function_graph_t ftrace_graph_function; +extern void unregister_ftrace_graph(void); -extern void ftrace_retfunc_init_task(struct task_struct *t); -extern void ftrace_retfunc_exit_task(struct task_struct *t); +extern void ftrace_graph_init_task(struct task_struct *t); +extern void ftrace_graph_exit_task(struct task_struct *t); #else -static inline void ftrace_retfunc_init_task(struct task_struct *t) { } -static inline void ftrace_retfunc_exit_task(struct task_struct *t) { } +static inline void ftrace_graph_init_task(struct task_struct *t) { } +static inline void ftrace_graph_exit_task(struct task_struct *t) { } #endif #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h index 0b4df55d7a74..366a054d0b05 100644 --- a/include/linux/ftrace_irq.h +++ b/include/linux/ftrace_irq.h @@ -2,7 +2,7 @@ #define _LINUX_FTRACE_IRQ_H -#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_RET_TRACER) +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER) extern void ftrace_nmi_enter(void); extern void ftrace_nmi_exit(void); #else diff --git a/include/linux/sched.h b/include/linux/sched.h index d02a0ca70ee9..7ad48f2a2758 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1365,7 +1365,7 @@ struct task_struct { unsigned long default_timer_slack_ns; struct list_head *scm_work_list; -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored adress in ret_stack */ int curr_ret_stack; /* Stack of return addresses for return function tracing */ diff --git a/kernel/Makefile b/kernel/Makefile index 03a45e7e87b7..703cf3b7389c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -21,7 +21,7 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg CFLAGS_REMOVE_sched.o = -pg endif -ifdef CONFIG_FUNCTION_RET_TRACER +ifdef CONFIG_FUNCTION_GRAPH_TRACER CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address() CFLAGS_REMOVE_module.o = -pg # For __module_text_address() endif diff --git a/kernel/fork.c b/kernel/fork.c index d6e1a3205f62..5f82a999c032 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -140,7 +140,7 @@ void free_task(struct task_struct *tsk) prop_local_destroy_single(&tsk->dirties); free_thread_info(tsk->stack); rt_mutex_debug_task_free(tsk); - ftrace_retfunc_exit_task(tsk); + ftrace_graph_exit_task(tsk); free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -1271,7 +1271,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); - ftrace_retfunc_init_task(p); + ftrace_graph_init_task(p); proc_fork_connector(p); cgroup_post_fork(p); return p; diff --git a/kernel/sched.c b/kernel/sched.c index 388d9db044ab..52490bf6b884 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5901,7 +5901,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) * The idle tasks have their own, simple scheduling class: */ idle->sched_class = &idle_sched_class; - ftrace_retfunc_init_task(idle); + ftrace_graph_init_task(idle); } /* diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 620feadff67a..eb9b901e0777 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -12,7 +12,7 @@ config NOP_TRACER config HAVE_FUNCTION_TRACER bool -config HAVE_FUNCTION_RET_TRACER +config HAVE_FUNCTION_GRAPH_TRACER bool config HAVE_FUNCTION_TRACE_MCOUNT_TEST @@ -63,15 +63,18 @@ config FUNCTION_TRACER (the bootup default), then the overhead of the instructions is very small and not measurable even in micro-benchmarks. -config FUNCTION_RET_TRACER - bool "Kernel Function return Tracer" - depends on HAVE_FUNCTION_RET_TRACER +config FUNCTION_GRAPH_TRACER + bool "Kernel Function Graph Tracer" + depends on HAVE_FUNCTION_GRAPH_TRACER depends on FUNCTION_TRACER help - Enable the kernel to trace a function at its return. - It's first purpose is to trace the duration of functions. - This is done by setting the current return address on the thread - info structure of the current task. + Enable the kernel to trace a function at both its return + and its entry. + It's first purpose is to trace the duration of functions and + draw a call graph for each thread with some informations like + the return value. + This is done by setting the current return address on the current + task structure into a stack of calls. config IRQSOFF_TRACER bool "Interrupts-off Latency Tracer" diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index cef4bcb4e822..08c5fe6ddc09 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -29,7 +29,7 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o obj-$(CONFIG_STACK_TRACER) += trace_stack.o obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o obj-$(CONFIG_BOOT_TRACER) += trace_boot.o -obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o obj-$(CONFIG_BTS_TRACER) += trace_bts.o diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 53042f118f23..9e19976af727 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -395,11 +395,11 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) unsigned long ip, fl; unsigned long ftrace_addr; -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER if (ftrace_tracing_type == FTRACE_TYPE_ENTER) ftrace_addr = (unsigned long)ftrace_caller; else - ftrace_addr = (unsigned long)ftrace_return_caller; + ftrace_addr = (unsigned long)ftrace_graph_caller; #else ftrace_addr = (unsigned long)ftrace_caller; #endif @@ -1496,13 +1496,13 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, return ret; } -#ifdef CONFIG_FUNCTION_RET_TRACER +#ifdef CONFIG_FUNCTION_GRAPH_TRACER static atomic_t ftrace_retfunc_active; /* The callback that hooks the return of a function */ -trace_function_return_t ftrace_function_return = - (trace_function_return_t)ftrace_stub; +trace_function_graph_t ftrace_graph_function = + (trace_function_graph_t)ftrace_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ @@ -1549,7 +1549,7 @@ free: } /* Allocate a return stack for each task */ -static int start_return_tracing(void) +static int start_graph_tracing(void) { struct ftrace_ret_stack **ret_stack_list; int ret; @@ -1569,7 +1569,7 @@ static int start_return_tracing(void) return ret; } -int register_ftrace_return(trace_function_return_t func) +int register_ftrace_graph(trace_function_graph_t func) { int ret = 0; @@ -1584,13 +1584,13 @@ int register_ftrace_return(trace_function_return_t func) goto out; } atomic_inc(&ftrace_retfunc_active); - ret = start_return_tracing(); + ret = start_graph_tracing(); if (ret) { atomic_dec(&ftrace_retfunc_active); goto out; } ftrace_tracing_type = FTRACE_TYPE_RETURN; - ftrace_function_return = func; + ftrace_graph_function = func; ftrace_startup(); out: @@ -1598,12 +1598,12 @@ out: return ret; } -void unregister_ftrace_return(void) +void unregister_ftrace_graph(void) { mutex_lock(&ftrace_sysctl_lock); atomic_dec(&ftrace_retfunc_active); - ftrace_function_return = (trace_function_return_t)ftrace_stub; + ftrace_graph_function = (trace_function_graph_t)ftrace_stub; ftrace_shutdown(); /* Restore normal tracing type */ ftrace_tracing_type = FTRACE_TYPE_ENTER; @@ -1612,7 +1612,7 @@ void unregister_ftrace_return(void) } /* Allocate a return stack for newly created task */ -void ftrace_retfunc_init_task(struct task_struct *t) +void ftrace_graph_init_task(struct task_struct *t) { if (atomic_read(&ftrace_retfunc_active)) { t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH @@ -1626,7 +1626,7 @@ void ftrace_retfunc_init_task(struct task_struct *t) t->ret_stack = NULL; } -void ftrace_retfunc_exit_task(struct task_struct *t) +void ftrace_graph_exit_task(struct task_struct *t) { struct ftrace_ret_stack *ret_stack = t->ret_stack; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8df8fdd69c95..f21ab2c68fd4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -878,15 +878,15 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, ring_buffer_unlock_commit(tr->buffer, event, irq_flags); } -#ifdef CONFIG_FUNCTION_RET_TRACER -static void __trace_function_return(struct trace_array *tr, +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static void __trace_function_graph(struct trace_array *tr, struct trace_array_cpu *data, - struct ftrace_retfunc *trace, + struct ftrace_graph_ret *trace, unsigned long flags, int pc) { struct ring_buffer_event *event; - struct ftrace_ret_entry *entry; + struct ftrace_graph_entry *entry; unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) @@ -1177,8 +1177,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) local_irq_restore(flags); } -#ifdef CONFIG_FUNCTION_RET_TRACER -void trace_function_return(struct ftrace_retfunc *trace) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void trace_function_graph(struct ftrace_graph_ret *trace) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1193,12 +1193,12 @@ void trace_function_return(struct ftrace_retfunc *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_function_return(tr, data, trace, flags, pc); + __trace_function_graph(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); raw_local_irq_restore(flags); } -#endif /* CONFIG_FUNCTION_RET_TRACER */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ static struct ftrace_ops trace_ops __read_mostly = { @@ -2001,7 +2001,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) break; } case TRACE_FN_RET: { - return print_return_function(iter); + return print_graph_function(iter); break; } case TRACE_BRANCH: { diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3abd645e8af2..72b5ef868765 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -57,7 +57,7 @@ struct ftrace_entry { }; /* Function return entry */ -struct ftrace_ret_entry { +struct ftrace_graph_entry { struct trace_entry ent; unsigned long ip; unsigned long parent_ip; @@ -264,7 +264,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ - IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\ + IF_ASSIGN(var, ent, struct ftrace_graph_entry, TRACE_FN_RET);\ IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\ __ftrace_bad_type(); \ } while (0) @@ -398,7 +398,7 @@ void trace_function(struct trace_array *tr, unsigned long parent_ip, unsigned long flags, int pc); void -trace_function_return(struct ftrace_retfunc *trace); +trace_function_graph(struct ftrace_graph_ret *trace); void trace_bts(struct trace_array *tr, unsigned long from, @@ -489,11 +489,11 @@ extern int trace_vprintk(unsigned long ip, const char *fmt, va_list args); extern unsigned long trace_flags; /* Standard output formatting function used for function return traces */ -#ifdef CONFIG_FUNCTION_RET_TRACER -extern enum print_line_t print_return_function(struct trace_iterator *iter); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +extern enum print_line_t print_graph_function(struct trace_iterator *iter); #else static inline enum print_line_t -print_return_function(struct trace_iterator *iter) +print_graph_function(struct trace_iterator *iter) { return TRACE_TYPE_UNHANDLED; } diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c new file mode 100644 index 000000000000..f5bad4624d2b --- /dev/null +++ b/kernel/trace/trace_functions_graph.c @@ -0,0 +1,98 @@ +/* + * + * Function graph tracer. + * Copyright (c) 2008 Frederic Weisbecker + * Mostly borrowed from function tracer which + * is Copyright (c) Steven Rostedt + * + */ +#include +#include +#include +#include + +#include "trace.h" + + +#define TRACE_GRAPH_PRINT_OVERRUN 0x1 +static struct tracer_opt trace_opts[] = { + /* Display overruns or not */ + { TRACER_OPT(overrun, TRACE_GRAPH_PRINT_OVERRUN) }, + { } /* Empty entry */ +}; + +static struct tracer_flags tracer_flags = { + .val = 0, /* Don't display overruns by default */ + .opts = trace_opts +}; + + +static int graph_trace_init(struct trace_array *tr) +{ + int cpu; + for_each_online_cpu(cpu) + tracing_reset(tr, cpu); + + return register_ftrace_graph(&trace_function_graph); +} + +static void graph_trace_reset(struct trace_array *tr) +{ + unregister_ftrace_graph(); +} + + +enum print_line_t +print_graph_function(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; + struct ftrace_graph_entry *field; + int ret; + + if (entry->type == TRACE_FN_RET) { + trace_assign_type(field, entry); + ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = seq_print_ip_sym(s, field->ip, + trace_flags & TRACE_ITER_SYM_MASK); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, " (%llu ns)", + field->rettime - field->calltime); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { + ret = trace_seq_printf(s, " (Overruns: %lu)", + field->overrun); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } + + ret = trace_seq_printf(s, "\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + return TRACE_TYPE_HANDLED; + } + return TRACE_TYPE_UNHANDLED; +} + +static struct tracer graph_trace __read_mostly = { + .name = "function-graph", + .init = graph_trace_init, + .reset = graph_trace_reset, + .print_line = print_graph_function, + .flags = &tracer_flags, +}; + +static __init int init_graph_trace(void) +{ + return register_tracer(&graph_trace); +} + +device_initcall(init_graph_trace); -- cgit v1.2.3 From 287b6e68ca7209caec40b2f44f837c580a413bae Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 26 Nov 2008 00:57:25 +0100 Subject: tracing/function-return-tracer: set a more human readable output Impact: feature This patch sets a C-like output for the function graph tracing. For this aim, we now call two handler for each function: one on the entry and one other on return. This way we can draw a well-ordered call stack. The pid of the previous trace is loosely stored to be compared against the one of the current trace to see if there were a context switch. Without this little feature, the call tree would seem broken at some locations. We could use the sched_tracer to capture these sched_events but this way of processing is much more simpler. 2 spaces have been chosen for indentation to fit the screen while deep calls. The time of execution in nanosecs is printed just after closed braces, it seems more easy this way to find the corresponding function. If the time was printed as a first column, it would be not so easy to find the corresponding function if it is called on a deep depth. I plan to output the return value but on 32 bits CPU, the return value can be 32 or 64, and its difficult to guess on which case we are. I don't know what would be the better solution on X86-32: only print eax (low-part) or even edx (high-part). Actually it's thee same problem when a function return a 8 bits value, the high part of eax could contain junk values... Here is an example of trace: sys_read() { fget_light() { } 526 vfs_read() { rw_verify_area() { security_file_permission() { cap_file_permission() { } 519 } 1564 } 2640 do_sync_read() { pipe_read() { __might_sleep() { } 511 pipe_wait() { prepare_to_wait() { } 760 deactivate_task() { dequeue_task() { dequeue_task_fair() { dequeue_entity() { update_curr() { update_min_vruntime() { } 504 } 1587 clear_buddies() { } 512 add_cfs_task_weight() { } 519 update_min_vruntime() { } 511 } 5602 dequeue_entity() { update_curr() { update_min_vruntime() { } 496 } 1631 clear_buddies() { } 496 update_min_vruntime() { } 527 } 4580 hrtick_update() { hrtick_start_fair() { } 488 } 1489 } 13700 } 14949 } 16016 msecs_to_jiffies() { } 496 put_prev_task_fair() { } 504 pick_next_task_fair() { } 489 pick_next_task_rt() { } 496 pick_next_task_fair() { } 489 pick_next_task_idle() { } 489 ------------8<---------- thread 4 ------------8<---------- finish_task_switch() { } 1203 do_softirq() { __do_softirq() { __local_bh_disable() { } 669 rcu_process_callbacks() { __rcu_process_callbacks() { cpu_quiet() { rcu_start_batch() { } 503 } 1647 } 3128 __rcu_process_callbacks() { } 542 } 5362 _local_bh_enable() { } 587 } 8880 } 9986 kthread_should_stop() { } 669 deactivate_task() { dequeue_task() { dequeue_task_fair() { dequeue_entity() { update_curr() { calc_delta_mine() { } 511 update_min_vruntime() { } 511 } 2813 Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 32 +++++++---- include/linux/ftrace.h | 25 ++++++-- kernel/trace/ftrace.c | 30 +++++----- kernel/trace/trace.c | 67 ++++++++++++++++++---- kernel/trace/trace.h | 28 +++++---- kernel/trace/trace_functions_graph.c | 104 ++++++++++++++++++++++++++-------- kernel/trace/trace_functions_return.c | 98 -------------------------------- 7 files changed, 208 insertions(+), 176 deletions(-) delete mode 100644 kernel/trace/trace_functions_return.c (limited to 'include') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 3595a4c14aba..26b2d92d48b3 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -347,7 +347,7 @@ void ftrace_nmi_exit(void) /* Add a function return address to the trace stack on thread info.*/ static int push_return_trace(unsigned long ret, unsigned long long time, - unsigned long func) + unsigned long func, int *depth) { int index; @@ -365,21 +365,22 @@ static int push_return_trace(unsigned long ret, unsigned long long time, current->ret_stack[index].ret = ret; current->ret_stack[index].func = func; current->ret_stack[index].calltime = time; + *depth = index; return 0; } /* Retrieve a function return address to the trace stack on thread info.*/ -static void pop_return_trace(unsigned long *ret, unsigned long long *time, - unsigned long *func, unsigned long *overrun) +static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) { int index; index = current->curr_ret_stack; *ret = current->ret_stack[index].ret; - *func = current->ret_stack[index].func; - *time = current->ret_stack[index].calltime; - *overrun = atomic_read(¤t->trace_overrun); + trace->func = current->ret_stack[index].func; + trace->calltime = current->ret_stack[index].calltime; + trace->overrun = atomic_read(¤t->trace_overrun); + trace->depth = index; current->curr_ret_stack--; } @@ -390,12 +391,13 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time, unsigned long ftrace_return_to_handler(void) { struct ftrace_graph_ret trace; - pop_return_trace(&trace.ret, &trace.calltime, &trace.func, - &trace.overrun); + unsigned long ret; + + pop_return_trace(&trace, &ret); trace.rettime = cpu_clock(raw_smp_processor_id()); - ftrace_graph_function(&trace); + ftrace_graph_return(&trace); - return trace.ret; + return ret; } /* @@ -407,6 +409,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) unsigned long old; unsigned long long calltime; int faulted; + struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long) &return_to_handler; @@ -452,8 +455,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) calltime = cpu_clock(raw_smp_processor_id()); - if (push_return_trace(old, calltime, self_addr) == -EBUSY) + if (push_return_trace(old, calltime, + self_addr, &trace.depth) == -EBUSY) { *parent = old; + return; + } + + trace.func = self_addr; + ftrace_graph_entry(&trace); + } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b4ac734ad8d6..fc2d54987198 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -312,27 +312,40 @@ ftrace_init_module(struct module *mod, #endif +/* + * Structure that defines an entry function trace. + */ +struct ftrace_graph_ent { + unsigned long func; /* Current function */ + int depth; +}; + /* * Structure that defines a return function trace. */ struct ftrace_graph_ret { - unsigned long ret; /* Return address */ unsigned long func; /* Current function */ unsigned long long calltime; unsigned long long rettime; /* Number of functions that overran the depth limit for current task */ unsigned long overrun; + int depth; }; #ifdef CONFIG_FUNCTION_GRAPH_TRACER #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 -/* Type of a callback handler of tracing return function */ -typedef void (*trace_function_graph_t)(struct ftrace_graph_ret *); +/* Type of the callback handlers for tracing function graph*/ +typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ +typedef void (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ + +extern int register_ftrace_graph(trace_func_graph_ret_t retfunc, + trace_func_graph_ent_t entryfunc); + +/* The current handlers in use */ +extern trace_func_graph_ret_t ftrace_graph_return; +extern trace_func_graph_ent_t ftrace_graph_entry; -extern int register_ftrace_graph(trace_function_graph_t func); -/* The current handler in use */ -extern trace_function_graph_t ftrace_graph_function; extern void unregister_ftrace_graph(void); extern void ftrace_graph_init_task(struct task_struct *t); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9e19976af727..7e2d3b91692d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1498,12 +1498,13 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static atomic_t ftrace_retfunc_active; - -/* The callback that hooks the return of a function */ -trace_function_graph_t ftrace_graph_function = - (trace_function_graph_t)ftrace_stub; +static atomic_t ftrace_graph_active; +/* The callbacks that hook a function */ +trace_func_graph_ret_t ftrace_graph_return = + (trace_func_graph_ret_t)ftrace_stub; +trace_func_graph_ent_t ftrace_graph_entry = + (trace_func_graph_ent_t)ftrace_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) @@ -1569,7 +1570,8 @@ static int start_graph_tracing(void) return ret; } -int register_ftrace_graph(trace_function_graph_t func) +int register_ftrace_graph(trace_func_graph_ret_t retfunc, + trace_func_graph_ent_t entryfunc) { int ret = 0; @@ -1583,14 +1585,15 @@ int register_ftrace_graph(trace_function_graph_t func) ret = -EBUSY; goto out; } - atomic_inc(&ftrace_retfunc_active); + atomic_inc(&ftrace_graph_active); ret = start_graph_tracing(); if (ret) { - atomic_dec(&ftrace_retfunc_active); + atomic_dec(&ftrace_graph_active); goto out; } ftrace_tracing_type = FTRACE_TYPE_RETURN; - ftrace_graph_function = func; + ftrace_graph_return = retfunc; + ftrace_graph_entry = entryfunc; ftrace_startup(); out: @@ -1602,8 +1605,9 @@ void unregister_ftrace_graph(void) { mutex_lock(&ftrace_sysctl_lock); - atomic_dec(&ftrace_retfunc_active); - ftrace_graph_function = (trace_function_graph_t)ftrace_stub; + atomic_dec(&ftrace_graph_active); + ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; + ftrace_graph_entry = (trace_func_graph_ent_t)ftrace_stub; ftrace_shutdown(); /* Restore normal tracing type */ ftrace_tracing_type = FTRACE_TYPE_ENTER; @@ -1614,7 +1618,7 @@ void unregister_ftrace_graph(void) /* Allocate a return stack for newly created task */ void ftrace_graph_init_task(struct task_struct *t) { - if (atomic_read(&ftrace_retfunc_active)) { + if (atomic_read(&ftrace_graph_active)) { t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH * sizeof(struct ftrace_ret_stack), GFP_KERNEL); @@ -1638,5 +1642,3 @@ void ftrace_graph_exit_task(struct task_struct *t) } #endif - - diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f21ab2c68fd4..9d5f7c94f251 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -879,14 +879,38 @@ trace_function(struct trace_array *tr, struct trace_array_cpu *data, } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -static void __trace_function_graph(struct trace_array *tr, +static void __trace_graph_entry(struct trace_array *tr, + struct trace_array_cpu *data, + struct ftrace_graph_ent *trace, + unsigned long flags, + int pc) +{ + struct ring_buffer_event *event; + struct ftrace_graph_ent_entry *entry; + unsigned long irq_flags; + + if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) + return; + + event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), + &irq_flags); + if (!event) + return; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, flags, pc); + entry->ent.type = TRACE_GRAPH_ENT; + entry->graph_ent = *trace; + ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); +} + +static void __trace_graph_return(struct trace_array *tr, struct trace_array_cpu *data, struct ftrace_graph_ret *trace, unsigned long flags, int pc) { struct ring_buffer_event *event; - struct ftrace_graph_entry *entry; + struct ftrace_graph_ret_entry *entry; unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) @@ -898,12 +922,8 @@ static void __trace_function_graph(struct trace_array *tr, return; entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_FN_RET; - entry->ip = trace->func; - entry->parent_ip = trace->ret; - entry->rettime = trace->rettime; - entry->calltime = trace->calltime; - entry->overrun = trace->overrun; + entry->ent.type = TRACE_GRAPH_RET; + entry->ret = *trace; ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); } #endif @@ -1178,7 +1198,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -void trace_function_graph(struct ftrace_graph_ret *trace) +void trace_graph_entry(struct ftrace_graph_ent *trace) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1193,7 +1213,28 @@ void trace_function_graph(struct ftrace_graph_ret *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_function_graph(tr, data, trace, flags, pc); + __trace_graph_entry(tr, data, trace, flags, pc); + } + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); +} + +void trace_graph_return(struct ftrace_graph_ret *trace) +{ + struct trace_array *tr = &global_trace; + struct trace_array_cpu *data; + unsigned long flags; + long disabled; + int cpu; + int pc; + + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + if (likely(disabled == 1)) { + pc = preempt_count(); + __trace_graph_return(tr, data, trace, flags, pc); } atomic_dec(&data->disabled); raw_local_irq_restore(flags); @@ -2000,9 +2041,11 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) trace_seq_print_cont(s, iter); break; } - case TRACE_FN_RET: { + case TRACE_GRAPH_RET: { + return print_graph_function(iter); + } + case TRACE_GRAPH_ENT: { return print_graph_function(iter); - break; } case TRACE_BRANCH: { struct trace_branch *field; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 72b5ef868765..ffe1bb1eb620 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -25,7 +25,8 @@ enum trace_type { TRACE_BRANCH, TRACE_BOOT_CALL, TRACE_BOOT_RET, - TRACE_FN_RET, + TRACE_GRAPH_RET, + TRACE_GRAPH_ENT, TRACE_USER_STACK, TRACE_BTS, @@ -56,14 +57,16 @@ struct ftrace_entry { unsigned long parent_ip; }; +/* Function call entry */ +struct ftrace_graph_ent_entry { + struct trace_entry ent; + struct ftrace_graph_ent graph_ent; +}; + /* Function return entry */ -struct ftrace_graph_entry { - struct trace_entry ent; - unsigned long ip; - unsigned long parent_ip; - unsigned long long calltime; - unsigned long long rettime; - unsigned long overrun; +struct ftrace_graph_ret_entry { + struct trace_entry ent; + struct ftrace_graph_ret ret; }; extern struct tracer boot_tracer; @@ -264,7 +267,10 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\ IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\ IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ - IF_ASSIGN(var, ent, struct ftrace_graph_entry, TRACE_FN_RET);\ + IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \ + TRACE_GRAPH_ENT); \ + IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \ + TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\ __ftrace_bad_type(); \ } while (0) @@ -397,9 +403,9 @@ void trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); -void -trace_function_graph(struct ftrace_graph_ret *trace); +void trace_graph_return(struct ftrace_graph_ret *trace); +void trace_graph_entry(struct ftrace_graph_ent *trace); void trace_bts(struct trace_array *tr, unsigned long from, unsigned long to); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index f5bad4624d2b..b6f0cc2a00cb 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -13,6 +13,7 @@ #include "trace.h" +#define TRACE_GRAPH_INDENT 2 #define TRACE_GRAPH_PRINT_OVERRUN 0x1 static struct tracer_opt trace_opts[] = { @@ -26,6 +27,8 @@ static struct tracer_flags tracer_flags = { .opts = trace_opts }; +/* pid on the last trace processed */ +static pid_t last_pid = -1; static int graph_trace_init(struct trace_array *tr) { @@ -33,7 +36,8 @@ static int graph_trace_init(struct trace_array *tr) for_each_online_cpu(cpu) tracing_reset(tr, cpu); - return register_ftrace_graph(&trace_function_graph); + return register_ftrace_graph(&trace_graph_return, + &trace_graph_entry); } static void graph_trace_reset(struct trace_array *tr) @@ -41,45 +45,97 @@ static void graph_trace_reset(struct trace_array *tr) unregister_ftrace_graph(); } +/* If the pid changed since the last trace, output this event */ +static int verif_pid(struct trace_seq *s, pid_t pid) +{ + if (last_pid != -1 && last_pid == pid) + return 1; -enum print_line_t -print_graph_function(struct trace_iterator *iter) + last_pid = pid; + return trace_seq_printf(s, "\n------------8<---------- thread %d" + " ------------8<----------\n\n", + pid); +} + +static enum print_line_t +print_graph_entry(struct ftrace_graph_ent *call, struct trace_seq *s, + struct trace_entry *ent) { - struct trace_seq *s = &iter->seq; - struct trace_entry *entry = iter->ent; - struct ftrace_graph_entry *field; + int i; int ret; - if (entry->type == TRACE_FN_RET) { - trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + if (!verif_pid(s, ent->pid)) + return TRACE_TYPE_PARTIAL_LINE; - ret = seq_print_ip_sym(s, field->ip, - trace_flags & TRACE_ITER_SYM_MASK); + for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { + ret = trace_seq_printf(s, " "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; + } + + ret = seq_print_ip_sym(s, call->func, 0); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + + ret = trace_seq_printf(s, "() {\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + return TRACE_TYPE_HANDLED; +} + +static enum print_line_t +print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, + struct trace_entry *ent) +{ + int i; + int ret; + + if (!verif_pid(s, ent->pid)) + return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, " (%llu ns)", - field->rettime - field->calltime); + for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { + ret = trace_seq_printf(s, " "); if (!ret) return TRACE_TYPE_PARTIAL_LINE; + } + + ret = trace_seq_printf(s, "} "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; - if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { - ret = trace_seq_printf(s, " (Overruns: %lu)", - field->overrun); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } + ret = trace_seq_printf(s, "%llu\n", trace->rettime - trace->calltime); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; - ret = trace_seq_printf(s, "\n"); + if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { + ret = trace_seq_printf(s, " (Overruns: %lu)\n", + trace->overrun); if (!ret) return TRACE_TYPE_PARTIAL_LINE; + } + return TRACE_TYPE_HANDLED; +} + +enum print_line_t +print_graph_function(struct trace_iterator *iter) +{ + struct trace_seq *s = &iter->seq; + struct trace_entry *entry = iter->ent; - return TRACE_TYPE_HANDLED; + switch (entry->type) { + case TRACE_GRAPH_ENT: { + struct ftrace_graph_ent_entry *field; + trace_assign_type(field, entry); + return print_graph_entry(&field->graph_ent, s, entry); + } + case TRACE_GRAPH_RET: { + struct ftrace_graph_ret_entry *field; + trace_assign_type(field, entry); + return print_graph_return(&field->ret, s, entry); + } + default: + return TRACE_TYPE_UNHANDLED; } - return TRACE_TYPE_UNHANDLED; } static struct tracer graph_trace __read_mostly = { diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c deleted file mode 100644 index e00d64509c9c..000000000000 --- a/kernel/trace/trace_functions_return.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * - * Function return tracer. - * Copyright (c) 2008 Frederic Weisbecker - * Mostly borrowed from function tracer which - * is Copyright (c) Steven Rostedt - * - */ -#include -#include -#include -#include - -#include "trace.h" - - -#define TRACE_RETURN_PRINT_OVERRUN 0x1 -static struct tracer_opt trace_opts[] = { - /* Display overruns or not */ - { TRACER_OPT(overrun, TRACE_RETURN_PRINT_OVERRUN) }, - { } /* Empty entry */ -}; - -static struct tracer_flags tracer_flags = { - .val = 0, /* Don't display overruns by default */ - .opts = trace_opts -}; - - -static int return_trace_init(struct trace_array *tr) -{ - int cpu; - for_each_online_cpu(cpu) - tracing_reset(tr, cpu); - - return register_ftrace_return(&trace_function_return); -} - -static void return_trace_reset(struct trace_array *tr) -{ - unregister_ftrace_return(); -} - - -enum print_line_t -print_return_function(struct trace_iterator *iter) -{ - struct trace_seq *s = &iter->seq; - struct trace_entry *entry = iter->ent; - struct ftrace_ret_entry *field; - int ret; - - if (entry->type == TRACE_FN_RET) { - trace_assign_type(field, entry); - ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = seq_print_ip_sym(s, field->ip, - trace_flags & TRACE_ITER_SYM_MASK); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - ret = trace_seq_printf(s, " (%llu ns)", - field->rettime - field->calltime); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - if (tracer_flags.val & TRACE_RETURN_PRINT_OVERRUN) { - ret = trace_seq_printf(s, " (Overruns: %lu)", - field->overrun); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - } - - ret = trace_seq_printf(s, "\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; - } - return TRACE_TYPE_UNHANDLED; -} - -static struct tracer return_trace __read_mostly = { - .name = "return", - .init = return_trace_init, - .reset = return_trace_reset, - .print_line = print_return_function, - .flags = &tracer_flags, -}; - -static __init int init_return_trace(void) -{ - return register_tracer(&return_trace); -} - -device_initcall(init_return_trace); -- cgit v1.2.3 From d62ddc21b674b5ac1466091ff3fbf7baa53bc92c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:14:31 -0800 Subject: netns xfrm: add netns boilerplate Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/net_namespace.h | 4 ++++ include/net/netns/xfrm.h | 7 +++++++ include/net/xfrm.h | 3 ++- net/xfrm/xfrm_policy.c | 45 ++++++++++++++++++++++++++++++++++++++++----- net/xfrm/xfrm_state.c | 7 ++++++- 5 files changed, 59 insertions(+), 7 deletions(-) create mode 100644 include/net/netns/xfrm.h (limited to 'include') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 319557789a40..6fc13d905c5f 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -19,6 +19,7 @@ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include #endif +#include struct proc_dir_entry; struct net_device; @@ -73,6 +74,9 @@ struct net { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; #endif +#endif +#ifdef CONFIG_XFRM + struct netns_xfrm xfrm; #endif struct net_generic *gen; }; diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h new file mode 100644 index 000000000000..1cb0024a3b47 --- /dev/null +++ b/include/net/netns/xfrm.h @@ -0,0 +1,7 @@ +#ifndef __NETNS_XFRM_H +#define __NETNS_XFRM_H + +struct netns_xfrm { +}; + +#endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 45e11b3631e4..9107d6f5c297 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1269,7 +1269,8 @@ struct xfrm6_tunnel { extern void xfrm_init(void); extern void xfrm4_init(void); -extern void xfrm_state_init(void); +extern int xfrm_state_init(struct net *net); +extern void xfrm_state_fini(struct net *net); extern void xfrm4_state_init(void); #ifdef CONFIG_XFRM extern int xfrm6_init(void); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ea3456daa9cb..8e7671b9e76e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2394,12 +2394,13 @@ static int __init xfrm_statistics_init(void) } #endif -static void __init xfrm_policy_init(void) +static int __net_init xfrm_policy_init(struct net *net) { unsigned int hmask, sz; int dir; - xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", + if (net_eq(net, &init_net)) + xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); @@ -2425,16 +2426,50 @@ static void __init xfrm_policy_init(void) } INIT_LIST_HEAD(&xfrm_policy_all); - register_netdevice_notifier(&xfrm_dev_notifier); + if (net_eq(net, &init_net)) + register_netdevice_notifier(&xfrm_dev_notifier); + return 0; +} + +static void xfrm_policy_fini(struct net *net) +{ } +static int __net_init xfrm_net_init(struct net *net) +{ + int rv; + + rv = xfrm_state_init(net); + if (rv < 0) + goto out_state; + rv = xfrm_policy_init(net); + if (rv < 0) + goto out_policy; + return 0; + +out_policy: + xfrm_state_fini(net); +out_state: + return rv; +} + +static void __net_exit xfrm_net_exit(struct net *net) +{ + xfrm_policy_fini(net); + xfrm_state_fini(net); +} + +static struct pernet_operations __net_initdata xfrm_net_ops = { + .init = xfrm_net_init, + .exit = xfrm_net_exit, +}; + void __init xfrm_init(void) { + register_pernet_subsys(&xfrm_net_ops); #ifdef CONFIG_XFRM_STATISTICS xfrm_statistics_init(); #endif - xfrm_state_init(); - xfrm_policy_init(); xfrm_input_init(); #ifdef CONFIG_XFRM_STATISTICS xfrm_proc_init(); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index cd9d9171ded7..268fe3f9e498 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2080,7 +2080,7 @@ error: EXPORT_SYMBOL(xfrm_init_state); -void __init xfrm_state_init(void) +int __net_init xfrm_state_init(struct net *net) { unsigned int sz; @@ -2094,6 +2094,11 @@ void __init xfrm_state_init(void) xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); + return 0; +} + +void xfrm_state_fini(struct net *net) +{ } #ifdef CONFIG_AUDITSYSCALL -- cgit v1.2.3 From 673c09be457bb23aa0eaaa79804cbb342210d195 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:15:16 -0800 Subject: netns xfrm: add struct xfrm_state::xs_net To avoid unnecessary complications with passing netns around. * set once, very early after allocating * once set, never changes For a while create every xfrm_state in init_net. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 10 +++++++++- net/ipv4/ipcomp.c | 2 +- net/ipv6/ipcomp6.c | 2 +- net/key/af_key.c | 2 +- net/xfrm/xfrm_state.c | 9 +++++---- net/xfrm/xfrm_user.c | 4 ++-- 6 files changed, 19 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9107d6f5c297..9da89039832c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -130,6 +130,9 @@ struct xfrm_state_walk { /* Full description of state of transformer. */ struct xfrm_state { +#ifdef CONFIG_NET_NS + struct net *xs_net; +#endif union { struct hlist_node gclist; struct hlist_node bydst; @@ -223,6 +226,11 @@ struct xfrm_state void *data; }; +static inline struct net *xs_net(struct xfrm_state *x) +{ + return read_pnet(&x->xs_net); +} + /* xflags - make enum if more show up */ #define XFRM_TIME_DEFER 1 @@ -1296,7 +1304,7 @@ extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); extern int xfrm_state_walk(struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); extern void xfrm_state_walk_done(struct xfrm_state_walk *walk); -extern struct xfrm_state *xfrm_state_alloc(void); +extern struct xfrm_state *xfrm_state_alloc(struct net *net); extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index ec8264ae45c2..0a35f1b6f22c 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -49,7 +49,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t; - t = xfrm_state_alloc(); + t = xfrm_state_alloc(&init_net); if (t == NULL) goto out; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index d4576a9c154f..c369638e208a 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -76,7 +76,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct xfrm_state *t = NULL; - t = xfrm_state_alloc(); + t = xfrm_state_alloc(&init_net); if (!t) goto out; diff --git a/net/key/af_key.c b/net/key/af_key.c index 5b22e011653b..bde8aad4cc93 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1122,7 +1122,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t))) return ERR_PTR(-EINVAL); - x = xfrm_state_alloc(); + x = xfrm_state_alloc(&init_net); if (x == NULL) return ERR_PTR(-ENOBUFS); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 268fe3f9e498..81bde76d049c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -504,13 +504,14 @@ out: static void xfrm_replay_timer_handler(unsigned long data); -struct xfrm_state *xfrm_state_alloc(void) +struct xfrm_state *xfrm_state_alloc(struct net *net) { struct xfrm_state *x; x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC); if (x) { + write_pnet(&x->xs_net, net); atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); INIT_LIST_HEAD(&x->km.all); @@ -835,7 +836,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, error = -EEXIST; goto out; } - x = xfrm_state_alloc(); + x = xfrm_state_alloc(&init_net); if (x == NULL) { error = -ENOMEM; goto out; @@ -1017,7 +1018,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re if (!create) return NULL; - x = xfrm_state_alloc(); + x = xfrm_state_alloc(&init_net); if (likely(x)) { switch (family) { case AF_INET: @@ -1125,7 +1126,7 @@ EXPORT_SYMBOL(xfrm_state_add); static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) { int err = -ENOMEM; - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(&init_net); if (!x) goto error; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ee15d5dd6544..65cdaa5c2280 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -320,7 +320,7 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, struct nlattr **attrs, int *errp) { - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(&init_net); int err = -ENOMEM; if (!x) @@ -1663,7 +1663,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *rt = attrs[XFRMA_TMPL]; struct xfrm_user_acquire *ua = nlmsg_data(nlh); - struct xfrm_state *x = xfrm_state_alloc(); + struct xfrm_state *x = xfrm_state_alloc(&init_net); int err = -ENOMEM; if (!x) -- cgit v1.2.3 From 9d4139c76905833afcb77fe8ccc17f302a0eb9ab Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:16:11 -0800 Subject: netns xfrm: per-netns xfrm_state_all list This is done to get a) simple "something leaked" check b) cover possible DoSes when other netns puts many, many xfrm_states onto a list. c) not miss "alien xfrm_state" check in some of list iterators in future. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 3 +++ net/xfrm/xfrm_state.c | 14 ++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 1cb0024a3b47..6ae234a16517 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -1,7 +1,10 @@ #ifndef __NETNS_XFRM_H #define __NETNS_XFRM_H +#include + struct netns_xfrm { + struct list_head state_all; }; #endif diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 81bde76d049c..85bb85484b70 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -50,7 +50,6 @@ static DEFINE_SPINLOCK(xfrm_state_lock); * Main use is finding SA after policy selected tunnel or transport mode. * Also, it can be used by ah/esp icmp error handler to find offending SA. */ -static LIST_HEAD(xfrm_state_all); static struct hlist_head *xfrm_state_bydst __read_mostly; static struct hlist_head *xfrm_state_bysrc __read_mostly; static struct hlist_head *xfrm_state_byspi __read_mostly; @@ -855,7 +854,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add(&x->km.all, &xfrm_state_all); + list_add(&x->km.all, &init_net.xfrm.state_all); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); @@ -924,7 +923,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) x->genid = ++xfrm_state_genid; - list_add(&x->km.all, &xfrm_state_all); + list_add(&x->km.all, &init_net.xfrm.state_all); h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); @@ -1053,7 +1052,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re xfrm_state_hold(x); x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); - list_add(&x->km.all, &xfrm_state_all); + list_add(&x->km.all, &init_net.xfrm.state_all); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); @@ -1559,10 +1558,10 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, spin_lock_bh(&xfrm_state_lock); if (list_empty(&walk->all)) - x = list_first_entry(&xfrm_state_all, struct xfrm_state_walk, all); + x = list_first_entry(&init_net.xfrm.state_all, struct xfrm_state_walk, all); else x = list_entry(&walk->all, struct xfrm_state_walk, all); - list_for_each_entry_from(x, &xfrm_state_all, all) { + list_for_each_entry_from(x, &init_net.xfrm.state_all, all) { if (x->state == XFRM_STATE_DEAD) continue; state = container_of(x, struct xfrm_state, km); @@ -2085,6 +2084,8 @@ int __net_init xfrm_state_init(struct net *net) { unsigned int sz; + INIT_LIST_HEAD(&net->xfrm.state_all); + sz = sizeof(struct hlist_head) * 8; xfrm_state_bydst = xfrm_hash_alloc(sz); @@ -2100,6 +2101,7 @@ int __net_init xfrm_state_init(struct net *net) void xfrm_state_fini(struct net *net) { + WARN_ON(!list_empty(&net->xfrm.state_all)); } #ifdef CONFIG_AUDITSYSCALL -- cgit v1.2.3 From 73d189dce486cd6693fa29169b1aac0872efbcea Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:16:58 -0800 Subject: netns xfrm: per-netns xfrm_state_bydst hash Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 9 +++++++++ net/xfrm/xfrm_state.c | 50 +++++++++++++++++++++++++----------------------- 2 files changed, 35 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 6ae234a16517..02487b39ce53 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -5,6 +5,15 @@ struct netns_xfrm { struct list_head state_all; + /* + * Hash table to find appropriate SA towards given target (endpoint of + * tunnel or destination of transport mode) allowed by selector. + * + * Main use is finding SA after policy selected tunnel or transport + * mode. Also, it can be used by ah/esp icmp error handler to find + * offending SA. + */ + struct hlist_head *state_bydst; }; #endif diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 85bb85484b70..08b78895ffbc 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -44,13 +44,6 @@ u32 sysctl_xfrm_acq_expires __read_mostly = 30; static DEFINE_SPINLOCK(xfrm_state_lock); -/* Hash table to find appropriate SA towards given target (endpoint - * of tunnel or destination of transport mode) allowed by selector. - * - * Main use is finding SA after policy selected tunnel or transport mode. - * Also, it can be used by ah/esp icmp error handler to find offending SA. - */ -static struct hlist_head *xfrm_state_bydst __read_mostly; static struct hlist_head *xfrm_state_bysrc __read_mostly; static struct hlist_head *xfrm_state_byspi __read_mostly; static unsigned int xfrm_state_hmask __read_mostly; @@ -157,15 +150,15 @@ static void xfrm_hash_resize(struct work_struct *__unused) nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; for (i = xfrm_state_hmask; i >= 0; i--) - xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi, + xfrm_hash_transfer(init_net.xfrm.state_bydst+i, ndst, nsrc, nspi, nhashmask); - odst = xfrm_state_bydst; + odst = init_net.xfrm.state_bydst; osrc = xfrm_state_bysrc; ospi = xfrm_state_byspi; ohashmask = xfrm_state_hmask; - xfrm_state_bydst = ndst; + init_net.xfrm.state_bydst = ndst; xfrm_state_bysrc = nsrc; xfrm_state_byspi = nspi; xfrm_state_hmask = nhashmask; @@ -595,7 +588,7 @@ xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { xfrm_audit_state_delete(x, 0, @@ -630,7 +623,7 @@ int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info) struct hlist_node *entry; struct xfrm_state *x; restart: - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -785,7 +778,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, spin_lock_bh(&xfrm_state_lock); h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && @@ -855,7 +848,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; list_add(&x->km.all, &init_net.xfrm.state_all); - hlist_add_head(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); if (x->id.spi) { @@ -895,7 +888,7 @@ xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, spin_lock(&xfrm_state_lock); h = xfrm_dst_hash(daddr, saddr, reqid, family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && @@ -927,7 +920,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); - hlist_add_head(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h); h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); @@ -960,7 +953,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) unsigned int h; h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && @@ -985,7 +978,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -1053,7 +1046,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); list_add(&x->km.all, &init_net.xfrm.state_all); - hlist_add_head(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); @@ -1208,7 +1201,7 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) if (m->reqid) { h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr, m->reqid, m->old_family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -1457,7 +1450,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) { if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); @@ -2088,20 +2081,29 @@ int __net_init xfrm_state_init(struct net *net) sz = sizeof(struct hlist_head) * 8; - xfrm_state_bydst = xfrm_hash_alloc(sz); + net->xfrm.state_bydst = xfrm_hash_alloc(sz); + if (!net->xfrm.state_bydst) + goto out_bydst; xfrm_state_bysrc = xfrm_hash_alloc(sz); xfrm_state_byspi = xfrm_hash_alloc(sz); - if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) - panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); return 0; + +out_bydst: + return -ENOMEM; } void xfrm_state_fini(struct net *net) { + unsigned int sz; + WARN_ON(!list_empty(&net->xfrm.state_all)); + + sz = (xfrm_state_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.state_bydst)); + xfrm_hash_free(net->xfrm.state_bydst, sz); } #ifdef CONFIG_AUDITSYSCALL -- cgit v1.2.3 From d320bbb306f2085892bc958781e8fcaf5d491589 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:17:24 -0800 Subject: netns xfrm: per-netns xfrm_state_bysrc hash Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_state.c | 23 ++++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 02487b39ce53..bfcd5a3e25b6 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -14,6 +14,7 @@ struct netns_xfrm { * offending SA. */ struct hlist_head *state_bydst; + struct hlist_head *state_bysrc; }; #endif diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 08b78895ffbc..52d828bdf3d7 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -44,7 +44,6 @@ u32 sysctl_xfrm_acq_expires __read_mostly = 30; static DEFINE_SPINLOCK(xfrm_state_lock); -static struct hlist_head *xfrm_state_bysrc __read_mostly; static struct hlist_head *xfrm_state_byspi __read_mostly; static unsigned int xfrm_state_hmask __read_mostly; static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; @@ -154,12 +153,12 @@ static void xfrm_hash_resize(struct work_struct *__unused) nhashmask); odst = init_net.xfrm.state_bydst; - osrc = xfrm_state_bysrc; + osrc = init_net.xfrm.state_bysrc; ospi = xfrm_state_byspi; ohashmask = xfrm_state_hmask; init_net.xfrm.state_bydst = ndst; - xfrm_state_bysrc = nsrc; + init_net.xfrm.state_bysrc = nsrc; xfrm_state_byspi = nspi; xfrm_state_hmask = nhashmask; @@ -712,7 +711,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm struct xfrm_state *x; struct hlist_node *entry; - hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || x->id.proto != proto) continue; @@ -850,7 +849,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, list_add(&x->km.all, &init_net.xfrm.state_all); hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); hlist_add_head(&x->byspi, xfrm_state_byspi+h); @@ -923,7 +922,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h); h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, @@ -1048,7 +1047,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re list_add(&x->km.all, &init_net.xfrm.state_all); hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h); xfrm_state_num++; @@ -1218,7 +1217,7 @@ struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m) } else { h = xfrm_src_hash(&m->old_daddr, &m->old_saddr, m->old_family); - hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) { if (x->props.mode != m->mode || x->id.proto != m->proto) continue; @@ -2084,13 +2083,17 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_bydst = xfrm_hash_alloc(sz); if (!net->xfrm.state_bydst) goto out_bydst; - xfrm_state_bysrc = xfrm_hash_alloc(sz); + net->xfrm.state_bysrc = xfrm_hash_alloc(sz); + if (!net->xfrm.state_bysrc) + goto out_bysrc; xfrm_state_byspi = xfrm_hash_alloc(sz); xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); return 0; +out_bysrc: + xfrm_hash_free(net->xfrm.state_bydst, sz); out_bydst: return -ENOMEM; } @@ -2102,6 +2105,8 @@ void xfrm_state_fini(struct net *net) WARN_ON(!list_empty(&net->xfrm.state_all)); sz = (xfrm_state_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.state_bysrc)); + xfrm_hash_free(net->xfrm.state_bysrc, sz); WARN_ON(!hlist_empty(net->xfrm.state_bydst)); xfrm_hash_free(net->xfrm.state_bydst, sz); } -- cgit v1.2.3 From b754a4fd8f58d245c9b5e92914cce09c4309cb67 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:17:47 -0800 Subject: netns xfrm: per-netns xfrm_state_byspi hash Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_state.c | 21 +++++++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index bfcd5a3e25b6..b05ca3f366aa 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -15,6 +15,7 @@ struct netns_xfrm { */ struct hlist_head *state_bydst; struct hlist_head *state_bysrc; + struct hlist_head *state_byspi; }; #endif diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 52d828bdf3d7..66ca1ef7f8eb 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -44,7 +44,6 @@ u32 sysctl_xfrm_acq_expires __read_mostly = 30; static DEFINE_SPINLOCK(xfrm_state_lock); -static struct hlist_head *xfrm_state_byspi __read_mostly; static unsigned int xfrm_state_hmask __read_mostly; static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; @@ -154,12 +153,12 @@ static void xfrm_hash_resize(struct work_struct *__unused) odst = init_net.xfrm.state_bydst; osrc = init_net.xfrm.state_bysrc; - ospi = xfrm_state_byspi; + ospi = init_net.xfrm.state_byspi; ohashmask = xfrm_state_hmask; init_net.xfrm.state_bydst = ndst; init_net.xfrm.state_bysrc = nsrc; - xfrm_state_byspi = nspi; + init_net.xfrm.state_byspi = nspi; xfrm_state_hmask = nhashmask; spin_unlock_bh(&xfrm_state_lock); @@ -679,7 +678,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, struct xfrm_state *x; struct hlist_node *entry; - hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) { + hlist_for_each_entry(x, entry, init_net.xfrm.state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto) @@ -852,7 +851,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); - hlist_add_head(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, init_net.xfrm.state_byspi+h); } x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; @@ -928,7 +927,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, init_net.xfrm.state_byspi+h); } mod_timer(&x->timer, jiffies + HZ); @@ -1524,7 +1523,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) if (x->id.spi) { spin_lock_bh(&xfrm_state_lock); h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, init_net.xfrm.state_byspi+h); spin_unlock_bh(&xfrm_state_lock); err = 0; @@ -2086,12 +2085,16 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_bysrc = xfrm_hash_alloc(sz); if (!net->xfrm.state_bysrc) goto out_bysrc; - xfrm_state_byspi = xfrm_hash_alloc(sz); + net->xfrm.state_byspi = xfrm_hash_alloc(sz); + if (!net->xfrm.state_byspi) + goto out_byspi; xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); return 0; +out_byspi: + xfrm_hash_free(net->xfrm.state_bysrc, sz); out_bysrc: xfrm_hash_free(net->xfrm.state_bydst, sz); out_bydst: @@ -2105,6 +2108,8 @@ void xfrm_state_fini(struct net *net) WARN_ON(!list_empty(&net->xfrm.state_all)); sz = (xfrm_state_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.state_byspi)); + xfrm_hash_free(net->xfrm.state_byspi, sz); WARN_ON(!hlist_empty(net->xfrm.state_bysrc)); xfrm_hash_free(net->xfrm.state_bysrc, sz); WARN_ON(!hlist_empty(net->xfrm.state_bydst)); -- cgit v1.2.3 From 529983ecabeae3d8e61c9e27079154b1b8544dcd Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:18:12 -0800 Subject: netns xfrm: per-netns xfrm_state_hmask Since hashtables are per-netns, they can be independently resized. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_state.c | 31 +++++++++++++++---------------- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index b05ca3f366aa..dbbc0e972273 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -16,6 +16,7 @@ struct netns_xfrm { struct hlist_head *state_bydst; struct hlist_head *state_bysrc; struct hlist_head *state_byspi; + unsigned int state_hmask; }; #endif diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 66ca1ef7f8eb..de08ed9a4775 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -44,7 +44,6 @@ u32 sysctl_xfrm_acq_expires __read_mostly = 30; static DEFINE_SPINLOCK(xfrm_state_lock); -static unsigned int xfrm_state_hmask __read_mostly; static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; @@ -64,20 +63,20 @@ static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, u32 reqid, unsigned short family) { - return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); + return __xfrm_dst_hash(daddr, saddr, reqid, family, init_net.xfrm.state_hmask); } static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family) { - return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask); + return __xfrm_src_hash(daddr, saddr, family, init_net.xfrm.state_hmask); } static inline unsigned int xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { - return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask); + return __xfrm_spi_hash(daddr, spi, proto, family, init_net.xfrm.state_hmask); } static void xfrm_hash_transfer(struct hlist_head *list, @@ -113,7 +112,7 @@ static void xfrm_hash_transfer(struct hlist_head *list, static unsigned long xfrm_hash_new_size(void) { - return ((xfrm_state_hmask + 1) << 1) * + return ((init_net.xfrm.state_hmask + 1) << 1) * sizeof(struct hlist_head); } @@ -147,19 +146,19 @@ static void xfrm_hash_resize(struct work_struct *__unused) spin_lock_bh(&xfrm_state_lock); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; - for (i = xfrm_state_hmask; i >= 0; i--) + for (i = init_net.xfrm.state_hmask; i >= 0; i--) xfrm_hash_transfer(init_net.xfrm.state_bydst+i, ndst, nsrc, nspi, nhashmask); odst = init_net.xfrm.state_bydst; osrc = init_net.xfrm.state_bysrc; ospi = init_net.xfrm.state_byspi; - ohashmask = xfrm_state_hmask; + ohashmask = init_net.xfrm.state_hmask; init_net.xfrm.state_bydst = ndst; init_net.xfrm.state_bysrc = nsrc; init_net.xfrm.state_byspi = nspi; - xfrm_state_hmask = nhashmask; + init_net.xfrm.state_hmask = nhashmask; spin_unlock_bh(&xfrm_state_lock); @@ -582,7 +581,7 @@ xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) { int i, err = 0; - for (i = 0; i <= xfrm_state_hmask; i++) { + for (i = 0; i <= init_net.xfrm.state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; @@ -617,7 +616,7 @@ int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info) if (err) goto out; - for (i = 0; i <= xfrm_state_hmask; i++) { + for (i = 0; i <= init_net.xfrm.state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; restart: @@ -652,7 +651,7 @@ void xfrm_sad_getinfo(struct xfrmk_sadinfo *si) { spin_lock_bh(&xfrm_state_lock); si->sadcnt = xfrm_state_num; - si->sadhcnt = xfrm_state_hmask; + si->sadhcnt = init_net.xfrm.state_hmask; si->sadhmcnt = xfrm_state_hashmax; spin_unlock_bh(&xfrm_state_lock); } @@ -754,8 +753,8 @@ __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) static void xfrm_hash_grow_check(int have_hash_collision) { if (have_hash_collision && - (xfrm_state_hmask + 1) < xfrm_state_hashmax && - xfrm_state_num > xfrm_state_hmask) + (init_net.xfrm.state_hmask + 1) < xfrm_state_hashmax && + xfrm_state_num > init_net.xfrm.state_hmask) schedule_work(&xfrm_hash_work); } @@ -1444,7 +1443,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) { int i; - for (i = 0; i <= xfrm_state_hmask; i++) { + for (i = 0; i <= init_net.xfrm.state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; @@ -2088,7 +2087,7 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_byspi = xfrm_hash_alloc(sz); if (!net->xfrm.state_byspi) goto out_byspi; - xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); + net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1); INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); return 0; @@ -2107,7 +2106,7 @@ void xfrm_state_fini(struct net *net) WARN_ON(!list_empty(&net->xfrm.state_all)); - sz = (xfrm_state_hmask + 1) * sizeof(struct hlist_head); + sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head); WARN_ON(!hlist_empty(net->xfrm.state_byspi)); xfrm_hash_free(net->xfrm.state_byspi, sz); WARN_ON(!hlist_empty(net->xfrm.state_bysrc)); -- cgit v1.2.3 From 0bf7c5b019518d3fe9cb96b9c97bf44d251472c3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:18:39 -0800 Subject: netns xfrm: per-netns xfrm_state counts Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_state.c | 14 +++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index dbbc0e972273..492b471d2a7f 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -17,6 +17,7 @@ struct netns_xfrm { struct hlist_head *state_bysrc; struct hlist_head *state_byspi; unsigned int state_hmask; + unsigned int state_num; }; #endif diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index de08ed9a4775..7ecf6eeff84a 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -45,7 +45,6 @@ u32 sysctl_xfrm_acq_expires __read_mostly = 30; static DEFINE_SPINLOCK(xfrm_state_lock); static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; -static unsigned int xfrm_state_num; static unsigned int xfrm_state_genid; static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family); @@ -548,7 +547,7 @@ int __xfrm_state_delete(struct xfrm_state *x) hlist_del(&x->bysrc); if (x->id.spi) hlist_del(&x->byspi); - xfrm_state_num--; + init_net.xfrm.state_num--; spin_unlock(&xfrm_state_lock); /* All xfrm_state objects are created by xfrm_state_alloc. @@ -650,7 +649,7 @@ EXPORT_SYMBOL(xfrm_state_flush); void xfrm_sad_getinfo(struct xfrmk_sadinfo *si) { spin_lock_bh(&xfrm_state_lock); - si->sadcnt = xfrm_state_num; + si->sadcnt = init_net.xfrm.state_num; si->sadhcnt = init_net.xfrm.state_hmask; si->sadhmcnt = xfrm_state_hashmax; spin_unlock_bh(&xfrm_state_lock); @@ -754,7 +753,7 @@ static void xfrm_hash_grow_check(int have_hash_collision) { if (have_hash_collision && (init_net.xfrm.state_hmask + 1) < xfrm_state_hashmax && - xfrm_state_num > init_net.xfrm.state_hmask) + init_net.xfrm.state_num > init_net.xfrm.state_hmask) schedule_work(&xfrm_hash_work); } @@ -855,7 +854,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); - xfrm_state_num++; + init_net.xfrm.state_num++; xfrm_hash_grow_check(x->bydst.next != NULL); } else { x->km.state = XFRM_STATE_DEAD; @@ -935,7 +934,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) wake_up(&km_waitq); - xfrm_state_num++; + init_net.xfrm.state_num++; xfrm_hash_grow_check(x->bydst.next != NULL); } @@ -1047,7 +1046,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h); - xfrm_state_num++; + init_net.xfrm.state_num++; xfrm_hash_grow_check(x->bydst.next != NULL); } @@ -2089,6 +2088,7 @@ int __net_init xfrm_state_init(struct net *net) goto out_byspi; net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1); + net->xfrm.state_num = 0; INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); return 0; -- cgit v1.2.3 From 630827338585022b851ec0a6335df8e436c900e4 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:19:07 -0800 Subject: netns xfrm: per-netns xfrm_hash_work All of this is implicit passing which netns's hashes should be resized. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 2 ++ net/xfrm/xfrm_state.c | 35 +++++++++++++++++------------------ 2 files changed, 19 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 492b471d2a7f..bd688021395a 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -2,6 +2,7 @@ #define __NETNS_XFRM_H #include +#include struct netns_xfrm { struct list_head state_all; @@ -18,6 +19,7 @@ struct netns_xfrm { struct hlist_head *state_byspi; unsigned int state_hmask; unsigned int state_num; + struct work_struct state_hash_work; }; #endif diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7ecf6eeff84a..1b2a72c8429c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -109,16 +109,16 @@ static void xfrm_hash_transfer(struct hlist_head *list, } } -static unsigned long xfrm_hash_new_size(void) +static unsigned long xfrm_hash_new_size(unsigned int state_hmask) { - return ((init_net.xfrm.state_hmask + 1) << 1) * - sizeof(struct hlist_head); + return ((state_hmask + 1) << 1) * sizeof(struct hlist_head); } static DEFINE_MUTEX(hash_resize_mutex); -static void xfrm_hash_resize(struct work_struct *__unused) +static void xfrm_hash_resize(struct work_struct *work) { + struct net *net = container_of(work, struct net, xfrm.state_hash_work); struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; unsigned long nsize, osize; unsigned int nhashmask, ohashmask; @@ -126,7 +126,7 @@ static void xfrm_hash_resize(struct work_struct *__unused) mutex_lock(&hash_resize_mutex); - nsize = xfrm_hash_new_size(); + nsize = xfrm_hash_new_size(net->xfrm.state_hmask); ndst = xfrm_hash_alloc(nsize); if (!ndst) goto out_unlock; @@ -145,19 +145,19 @@ static void xfrm_hash_resize(struct work_struct *__unused) spin_lock_bh(&xfrm_state_lock); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; - for (i = init_net.xfrm.state_hmask; i >= 0; i--) - xfrm_hash_transfer(init_net.xfrm.state_bydst+i, ndst, nsrc, nspi, + for (i = net->xfrm.state_hmask; i >= 0; i--) + xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi, nhashmask); - odst = init_net.xfrm.state_bydst; - osrc = init_net.xfrm.state_bysrc; - ospi = init_net.xfrm.state_byspi; - ohashmask = init_net.xfrm.state_hmask; + odst = net->xfrm.state_bydst; + osrc = net->xfrm.state_bysrc; + ospi = net->xfrm.state_byspi; + ohashmask = net->xfrm.state_hmask; - init_net.xfrm.state_bydst = ndst; - init_net.xfrm.state_bysrc = nsrc; - init_net.xfrm.state_byspi = nspi; - init_net.xfrm.state_hmask = nhashmask; + net->xfrm.state_bydst = ndst; + net->xfrm.state_bysrc = nsrc; + net->xfrm.state_byspi = nspi; + net->xfrm.state_hmask = nhashmask; spin_unlock_bh(&xfrm_state_lock); @@ -170,8 +170,6 @@ out_unlock: mutex_unlock(&hash_resize_mutex); } -static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize); - DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -754,7 +752,7 @@ static void xfrm_hash_grow_check(int have_hash_collision) if (have_hash_collision && (init_net.xfrm.state_hmask + 1) < xfrm_state_hashmax && init_net.xfrm.state_num > init_net.xfrm.state_hmask) - schedule_work(&xfrm_hash_work); + schedule_work(&init_net.xfrm.state_hash_work); } struct xfrm_state * @@ -2089,6 +2087,7 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1); net->xfrm.state_num = 0; + INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); return 0; -- cgit v1.2.3 From b8a0ae20b0eecd4b86a113d2abe2fa5a582b30a6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:20:11 -0800 Subject: netns xfrm: per-netns state GC list km_waitq is going to be made per-netns to disallow spurious wakeups in __xfrm_lookup(). To not wakeup after every garbage-collected xfrm_state (which potentially can be from different netns) make state GC list per-netns. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_state.c | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index bd688021395a..8ceb76568852 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -20,6 +20,7 @@ struct netns_xfrm { unsigned int state_hmask; unsigned int state_num; struct work_struct state_hash_work; + struct hlist_head state_gc_list; }; #endif diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1b2a72c8429c..864a97477ae5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -177,7 +177,6 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; static struct work_struct xfrm_state_gc_work; -static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); @@ -394,7 +393,7 @@ static void xfrm_state_gc_task(struct work_struct *data) struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); - hlist_move_list(&xfrm_state_gc_list, &gc_list); + hlist_move_list(&init_net.xfrm.state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist) @@ -527,7 +526,7 @@ void __xfrm_state_destroy(struct xfrm_state *x) WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->gclist, &xfrm_state_gc_list); + hlist_add_head(&x->gclist, &init_net.xfrm.state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); schedule_work(&xfrm_state_gc_work); } @@ -2088,6 +2087,7 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_num = 0; INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); + INIT_HLIST_HEAD(&net->xfrm.state_gc_list); INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); return 0; -- cgit v1.2.3 From c78371441c0d957f54c9f8a35b3ee5a378d14808 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:20:36 -0800 Subject: netns xfrm: per-netns state GC work State GC is per-netns, and this is part of it. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_state.c | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 8ceb76568852..80555351fe5e 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -21,6 +21,7 @@ struct netns_xfrm { unsigned int state_num; struct work_struct state_hash_work; struct hlist_head state_gc_list; + struct work_struct state_gc_work; }; #endif diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 864a97477ae5..69c0b06bf756 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -176,7 +176,6 @@ EXPORT_SYMBOL(km_waitq); static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; -static struct work_struct xfrm_state_gc_work; static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); @@ -386,14 +385,15 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x); } -static void xfrm_state_gc_task(struct work_struct *data) +static void xfrm_state_gc_task(struct work_struct *work) { + struct net *net = container_of(work, struct net, xfrm.state_gc_work); struct xfrm_state *x; struct hlist_node *entry, *tmp; struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); - hlist_move_list(&init_net.xfrm.state_gc_list, &gc_list); + hlist_move_list(&net->xfrm.state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist) @@ -528,7 +528,7 @@ void __xfrm_state_destroy(struct xfrm_state *x) spin_lock_bh(&xfrm_state_gc_lock); hlist_add_head(&x->gclist, &init_net.xfrm.state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - schedule_work(&xfrm_state_gc_work); + schedule_work(&init_net.xfrm.state_gc_work); } EXPORT_SYMBOL(__xfrm_state_destroy); @@ -2088,7 +2088,7 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_num = 0; INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); INIT_HLIST_HEAD(&net->xfrm.state_gc_list); - INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); + INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task); return 0; out_byspi: -- cgit v1.2.3 From 50a30657fd7ee77a94a6bf0ad86eba7c37c3032e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:21:01 -0800 Subject: netns xfrm: per-netns km_waitq Disallow spurious wakeups in __xfrm_lookup(). Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 3 +++ include/net/xfrm.h | 1 - net/key/af_key.c | 2 +- net/xfrm/xfrm_policy.c | 4 ++-- net/xfrm/xfrm_state.c | 16 +++++++--------- 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 80555351fe5e..2a383c8ba1af 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -2,6 +2,7 @@ #define __NETNS_XFRM_H #include +#include #include struct netns_xfrm { @@ -22,6 +23,8 @@ struct netns_xfrm { struct work_struct state_hash_work; struct hlist_head state_gc_list; struct work_struct state_gc_work; + + wait_queue_head_t km_waitq; }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9da89039832c..0d4353c11093 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1459,7 +1459,6 @@ extern int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_kmaddress *k); #endif -extern wait_queue_head_t km_waitq; extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid); extern int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); diff --git a/net/key/af_key.c b/net/key/af_key.c index bde8aad4cc93..f202ba6c8dcb 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1411,7 +1411,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg * spin_lock_bh(&x->lock); if (x->km.state == XFRM_STATE_ACQ) { x->km.state = XFRM_STATE_ERROR; - wake_up(&km_waitq); + wake_up(&init_net.xfrm.km_waitq); } spin_unlock_bh(&x->lock); xfrm_state_put(x); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8e7671b9e76e..cf2bf3aa7ab4 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1691,11 +1691,11 @@ restart: if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { DECLARE_WAITQUEUE(wait, current); - add_wait_queue(&km_waitq, &wait); + add_wait_queue(&init_net.xfrm.km_waitq, &wait); set_current_state(TASK_INTERRUPTIBLE); schedule(); set_current_state(TASK_RUNNING); - remove_wait_queue(&km_waitq, &wait); + remove_wait_queue(&init_net.xfrm.km_waitq, &wait); nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 69c0b06bf756..24bd89e76236 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -170,9 +170,6 @@ out_unlock: mutex_unlock(&hash_resize_mutex); } -DECLARE_WAIT_QUEUE_HEAD(km_waitq); -EXPORT_SYMBOL(km_waitq); - static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; @@ -399,7 +396,7 @@ static void xfrm_state_gc_task(struct work_struct *work) hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); - wake_up(&km_waitq); + wake_up(&net->xfrm.km_waitq); } static inline unsigned long make_jiffies(long secs) @@ -470,7 +467,7 @@ resched: expired: if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) { x->km.state = XFRM_STATE_EXPIRED; - wake_up(&km_waitq); + wake_up(&init_net.xfrm.km_waitq); next = 2; goto resched; } @@ -638,7 +635,7 @@ restart: out: spin_unlock_bh(&xfrm_state_lock); - wake_up(&km_waitq); + wake_up(&init_net.xfrm.km_waitq); return err; } EXPORT_SYMBOL(xfrm_state_flush); @@ -929,7 +926,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) if (x->replay_maxage) mod_timer(&x->rtimer, jiffies + x->replay_maxage); - wake_up(&km_waitq); + wake_up(&init_net.xfrm.km_waitq); init_net.xfrm.state_num++; @@ -1743,7 +1740,7 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 pid) km_state_notify(x, &c); if (hard) - wake_up(&km_waitq); + wake_up(&init_net.xfrm.km_waitq); } EXPORT_SYMBOL(km_state_expired); @@ -1794,7 +1791,7 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) km_policy_notify(pol, dir, &c); if (hard) - wake_up(&km_waitq); + wake_up(&init_net.xfrm.km_waitq); } EXPORT_SYMBOL(km_policy_expired); @@ -2089,6 +2086,7 @@ int __net_init xfrm_state_init(struct net *net) INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); INIT_HLIST_HEAD(&net->xfrm.state_gc_list); INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task); + init_waitqueue_head(&net->xfrm.km_waitq); return 0; out_byspi: -- cgit v1.2.3 From 0331b1f383e1fa4049f8e75cafeea8f006171c64 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:21:45 -0800 Subject: netns xfrm: add struct xfrm_policy::xp_net Again, to avoid complications with passing netns when not necessary. Again, ->xp_net is set-once field, once set it never changes. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 10 +++++++++- net/key/af_key.c | 4 ++-- net/xfrm/xfrm_policy.c | 5 +++-- net/xfrm/xfrm_user.c | 4 ++-- 4 files changed, 16 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0d4353c11093..1ab17565f01c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -475,6 +475,9 @@ struct xfrm_policy_walk { struct xfrm_policy { +#ifdef CONFIG_NET_NS + struct net *xp_net; +#endif struct hlist_node bydst; struct hlist_node byidx; @@ -499,6 +502,11 @@ struct xfrm_policy struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; }; +static inline struct net *xp_net(struct xfrm_policy *xp) +{ + return read_pnet(&xp->xp_net); +} + struct xfrm_kmaddress { xfrm_address_t local; xfrm_address_t remote; @@ -1425,7 +1433,7 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) } #endif -struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); +struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); extern void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type); extern int xfrm_policy_walk(struct xfrm_policy_walk *walk, diff --git a/net/key/af_key.c b/net/key/af_key.c index f202ba6c8dcb..036315d6b665 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2174,7 +2174,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) return -EINVAL; - xp = xfrm_policy_alloc(GFP_KERNEL); + xp = xfrm_policy_alloc(&init_net, GFP_KERNEL); if (xp == NULL) return -ENOBUFS; @@ -3141,7 +3141,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir > IPSEC_DIR_OUTBOUND)) return NULL; - xp = xfrm_policy_alloc(GFP_ATOMIC); + xp = xfrm_policy_alloc(&init_net, GFP_ATOMIC); if (xp == NULL) { *dir = -ENOBUFS; return NULL; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cf2bf3aa7ab4..3eccefae2c8a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -228,13 +228,14 @@ expired: * SPD calls. */ -struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) +struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) { struct xfrm_policy *policy; policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { + write_pnet(&policy->xp_net, net); INIT_LIST_HEAD(&policy->walk.all); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); @@ -1153,7 +1154,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) { - struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC); + struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); if (newp) { newp->selector = old->selector; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 65cdaa5c2280..765c01e784e9 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1080,7 +1080,7 @@ static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_i static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp) { - struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL); + struct xfrm_policy *xp = xfrm_policy_alloc(&init_net, GFP_KERNEL); int err; if (!xp) { @@ -2291,7 +2291,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, if (p->dir > XFRM_POLICY_OUT) return NULL; - xp = xfrm_policy_alloc(GFP_KERNEL); + xp = xfrm_policy_alloc(&init_net, GFP_KERNEL); if (xp == NULL) { *dir = -ENOBUFS; return NULL; -- cgit v1.2.3 From adfcf0b27e87d16a6a8c364daa724653d4d8930b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:22:11 -0800 Subject: netns xfrm: per-netns policy list Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 2 ++ net/xfrm/xfrm_policy.c | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 2a383c8ba1af..66489249cd1a 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -25,6 +25,8 @@ struct netns_xfrm { struct work_struct state_gc_work; wait_queue_head_t km_waitq; + + struct list_head policy_all; }; #endif diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3eccefae2c8a..aabc7f28e9c0 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -46,7 +46,6 @@ EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); -static struct list_head xfrm_policy_all; unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; EXPORT_SYMBOL(xfrm_policy_count); @@ -615,7 +614,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - list_add(&policy->walk.all, &xfrm_policy_all); + list_add(&policy->walk.all, &init_net.xfrm.policy_all); write_unlock_bh(&xfrm_policy_lock); if (delpol) @@ -881,10 +880,10 @@ int xfrm_policy_walk(struct xfrm_policy_walk *walk, write_lock_bh(&xfrm_policy_lock); if (list_empty(&walk->walk.all)) - x = list_first_entry(&xfrm_policy_all, struct xfrm_policy_walk_entry, all); + x = list_first_entry(&init_net.xfrm.policy_all, struct xfrm_policy_walk_entry, all); else x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); - list_for_each_entry_from(x, &xfrm_policy_all, all) { + list_for_each_entry_from(x, &init_net.xfrm.policy_all, all) { if (x->dead) continue; pol = container_of(x, struct xfrm_policy, walk); @@ -1086,7 +1085,7 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) struct hlist_head *chain = policy_hash_bysel(&pol->selector, pol->family, dir); - list_add(&pol->walk.all, &xfrm_policy_all); + list_add(&pol->walk.all, &init_net.xfrm.policy_all); hlist_add_head(&pol->bydst, chain); hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); xfrm_policy_count[dir]++; @@ -2426,7 +2425,7 @@ static int __net_init xfrm_policy_init(struct net *net) panic("XFRM: failed to allocate bydst hash\n"); } - INIT_LIST_HEAD(&xfrm_policy_all); + INIT_LIST_HEAD(&net->xfrm.policy_all); if (net_eq(net, &init_net)) register_netdevice_notifier(&xfrm_dev_notifier); return 0; @@ -2434,6 +2433,7 @@ static int __net_init xfrm_policy_init(struct net *net) static void xfrm_policy_fini(struct net *net) { + WARN_ON(!list_empty(&net->xfrm.policy_all)); } static int __net_init xfrm_net_init(struct net *net) -- cgit v1.2.3 From 93b851c1c93c7d5cd8d94cd3f3a268b2d5460e9e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:22:35 -0800 Subject: netns xfrm: per-netns xfrm_policy_byidx hash Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_policy.c | 28 ++++++++++++++++++---------- 2 files changed, 19 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 66489249cd1a..5cd7d06c692b 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -27,6 +27,7 @@ struct netns_xfrm { wait_queue_head_t km_waitq; struct list_head policy_all; + struct hlist_head *policy_byidx; }; #endif diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index aabc7f28e9c0..700cdd7564e4 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -329,7 +329,6 @@ struct xfrm_policy_hash { static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; -static struct hlist_head *xfrm_policy_byidx __read_mostly; static unsigned int xfrm_idx_hmask __read_mostly; static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; @@ -438,7 +437,7 @@ static void xfrm_byidx_resize(int total) unsigned int hmask = xfrm_idx_hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *oidx = xfrm_policy_byidx; + struct hlist_head *oidx = init_net.xfrm.policy_byidx; struct hlist_head *nidx = xfrm_hash_alloc(nsize); int i; @@ -450,7 +449,7 @@ static void xfrm_byidx_resize(int total) for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); - xfrm_policy_byidx = nidx; + init_net.xfrm.policy_byidx = nidx; xfrm_idx_hmask = nhashmask; write_unlock_bh(&xfrm_policy_lock); @@ -536,7 +535,7 @@ static u32 xfrm_gen_index(int dir) idx_generator += 8; if (idx == 0) idx = 8; - list = xfrm_policy_byidx + idx_hash(idx); + list = init_net.xfrm.policy_byidx + idx_hash(idx); found = 0; hlist_for_each_entry(p, entry, list, byidx) { if (p->index == idx) { @@ -609,7 +608,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) xfrm_policy_count[dir]--; } policy->index = delpol ? delpol->index : xfrm_gen_index(dir); - hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); + hlist_add_head(&policy->byidx, init_net.xfrm.policy_byidx+idx_hash(policy->index)); policy->curlft.add_time = get_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) @@ -711,7 +710,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, *err = 0; write_lock_bh(&xfrm_policy_lock); - chain = xfrm_policy_byidx + idx_hash(id); + chain = init_net.xfrm.policy_byidx + idx_hash(id); ret = NULL; hlist_for_each_entry(pol, entry, chain, byidx) { if (pol->type == type && pol->index == id) { @@ -1087,7 +1086,7 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) list_add(&pol->walk.all, &init_net.xfrm.policy_all); hlist_add_head(&pol->bydst, chain); - hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); + hlist_add_head(&pol->byidx, init_net.xfrm.policy_byidx+idx_hash(pol->index)); xfrm_policy_count[dir]++; xfrm_pol_hold(pol); @@ -2408,10 +2407,10 @@ static int __net_init xfrm_policy_init(struct net *net) hmask = 8 - 1; sz = (hmask+1) * sizeof(struct hlist_head); - xfrm_policy_byidx = xfrm_hash_alloc(sz); + net->xfrm.policy_byidx = xfrm_hash_alloc(sz); + if (!net->xfrm.policy_byidx) + goto out_byidx; xfrm_idx_hmask = hmask; - if (!xfrm_policy_byidx) - panic("XFRM: failed to allocate byidx hash\n"); for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { struct xfrm_policy_hash *htab; @@ -2429,11 +2428,20 @@ static int __net_init xfrm_policy_init(struct net *net) if (net_eq(net, &init_net)) register_netdevice_notifier(&xfrm_dev_notifier); return 0; + +out_byidx: + return -ENOMEM; } static void xfrm_policy_fini(struct net *net) { + unsigned int sz; + WARN_ON(!list_empty(&net->xfrm.policy_all)); + + sz = (xfrm_idx_hmask + 1) * sizeof(struct hlist_head); + WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); + xfrm_hash_free(net->xfrm.policy_byidx, sz); } static int __net_init xfrm_net_init(struct net *net) -- cgit v1.2.3 From 8100bea7d619e8496ad8e545d1b41f536e076cd5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:22:58 -0800 Subject: netns xfrm: per-netns xfrm_policy_byidx hashmask Per-netns hashes are independently resizeable. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_policy.c | 15 +++++++-------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 5cd7d06c692b..42dc318fe3de 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -28,6 +28,7 @@ struct netns_xfrm { struct list_head policy_all; struct hlist_head *policy_byidx; + unsigned int policy_idx_hmask; }; #endif diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 700cdd7564e4..9e37a44f148e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -329,12 +329,11 @@ struct xfrm_policy_hash { static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; -static unsigned int xfrm_idx_hmask __read_mostly; static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; static inline unsigned int idx_hash(u32 index) { - return __idx_hash(index, xfrm_idx_hmask); + return __idx_hash(index, init_net.xfrm.policy_idx_hmask); } static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) @@ -434,7 +433,7 @@ static void xfrm_bydst_resize(int dir) static void xfrm_byidx_resize(int total) { - unsigned int hmask = xfrm_idx_hmask; + unsigned int hmask = init_net.xfrm.policy_idx_hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); struct hlist_head *oidx = init_net.xfrm.policy_byidx; @@ -450,7 +449,7 @@ static void xfrm_byidx_resize(int total) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); init_net.xfrm.policy_byidx = nidx; - xfrm_idx_hmask = nhashmask; + init_net.xfrm.policy_idx_hmask = nhashmask; write_unlock_bh(&xfrm_policy_lock); @@ -474,7 +473,7 @@ static inline int xfrm_bydst_should_resize(int dir, int *total) static inline int xfrm_byidx_should_resize(int total) { - unsigned int hmask = xfrm_idx_hmask; + unsigned int hmask = init_net.xfrm.policy_idx_hmask; if ((hmask + 1) < xfrm_policy_hashmax && total > hmask) @@ -492,7 +491,7 @@ void xfrm_spd_getinfo(struct xfrmk_spdinfo *si) si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; - si->spdhcnt = xfrm_idx_hmask; + si->spdhcnt = init_net.xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; read_unlock_bh(&xfrm_policy_lock); } @@ -2410,7 +2409,7 @@ static int __net_init xfrm_policy_init(struct net *net) net->xfrm.policy_byidx = xfrm_hash_alloc(sz); if (!net->xfrm.policy_byidx) goto out_byidx; - xfrm_idx_hmask = hmask; + net->xfrm.policy_idx_hmask = hmask; for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { struct xfrm_policy_hash *htab; @@ -2439,7 +2438,7 @@ static void xfrm_policy_fini(struct net *net) WARN_ON(!list_empty(&net->xfrm.policy_all)); - sz = (xfrm_idx_hmask + 1) * sizeof(struct hlist_head); + sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); xfrm_hash_free(net->xfrm.policy_byidx, sz); } -- cgit v1.2.3 From 8b18f8eaf9207d53ba3e69f2b98d7290f4dec227 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:23:26 -0800 Subject: netns xfrm: per-netns inexact policies Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 2 ++ net/xfrm/xfrm_policy.c | 20 ++++++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 42dc318fe3de..c7568315f16c 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -4,6 +4,7 @@ #include #include #include +#include struct netns_xfrm { struct list_head state_all; @@ -29,6 +30,7 @@ struct netns_xfrm { struct list_head policy_all; struct hlist_head *policy_byidx; unsigned int policy_idx_hmask; + struct hlist_head policy_inexact[XFRM_POLICY_MAX * 2]; }; #endif diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 9e37a44f148e..ba4e95b8e24e 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -327,7 +327,6 @@ struct xfrm_policy_hash { unsigned int hmask; }; -static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; @@ -342,7 +341,7 @@ static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned unsigned int hash = __sel_hash(sel, family, hmask); return (hash == hmask + 1 ? - &xfrm_policy_inexact[dir] : + &init_net.xfrm.policy_inexact[dir] : xfrm_policy_bydst[dir].table + hash); } @@ -752,7 +751,7 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) int i; hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { + &init_net.xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; err = security_xfrm_policy_delete(pol->security); @@ -810,7 +809,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) killed = 0; again1: hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { + &init_net.xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; hlist_del(&pol->bydst); @@ -983,7 +982,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, break; } } - chain = &xfrm_policy_inexact[dir]; + chain = &init_net.xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { @@ -2152,7 +2151,7 @@ static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) int i; hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) + &init_net.xfrm.policy_inexact[dir], bydst) prune_one_bundle(pol, func, &gc_list); table = xfrm_policy_bydst[dir].table; @@ -2414,7 +2413,7 @@ static int __net_init xfrm_policy_init(struct net *net) for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { struct xfrm_policy_hash *htab; - INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); + INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); htab = &xfrm_policy_bydst[dir]; htab->table = xfrm_hash_alloc(sz); @@ -2435,9 +2434,14 @@ out_byidx: static void xfrm_policy_fini(struct net *net) { unsigned int sz; + int dir; WARN_ON(!list_empty(&net->xfrm.policy_all)); + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); + } + sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); xfrm_hash_free(net->xfrm.policy_byidx, sz); @@ -2590,7 +2594,7 @@ static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel, break; } } - chain = &xfrm_policy_inexact[dir]; + chain = &init_net.xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, entry, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type && -- cgit v1.2.3 From a35f6c5de32664d82c072a7e2c7d5c5234de4158 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:23:48 -0800 Subject: netns xfrm: per-netns xfrm_policy_bydst hash Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 6 +++++ net/xfrm/xfrm_policy.c | 57 ++++++++++++++++++++++++++++-------------------- 2 files changed, 39 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index c7568315f16c..39cfa799fa90 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -6,6 +6,11 @@ #include #include +struct xfrm_policy_hash { + struct hlist_head *table; + unsigned int hmask; +}; + struct netns_xfrm { struct list_head state_all; /* @@ -31,6 +36,7 @@ struct netns_xfrm { struct hlist_head *policy_byidx; unsigned int policy_idx_hmask; struct hlist_head policy_inexact[XFRM_POLICY_MAX * 2]; + struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX * 2]; }; #endif diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ba4e95b8e24e..929b2fdaa2ef 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -322,12 +322,6 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) schedule_work(&xfrm_policy_gc_work); } -struct xfrm_policy_hash { - struct hlist_head *table; - unsigned int hmask; -}; - -static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; static inline unsigned int idx_hash(u32 index) @@ -337,20 +331,20 @@ static inline unsigned int idx_hash(u32 index) static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask; unsigned int hash = __sel_hash(sel, family, hmask); return (hash == hmask + 1 ? &init_net.xfrm.policy_inexact[dir] : - xfrm_policy_bydst[dir].table + hash); + init_net.xfrm.policy_bydst[dir].table + hash); } static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask; unsigned int hash = __addr_hash(daddr, saddr, family, hmask); - return xfrm_policy_bydst[dir].table + hash; + return init_net.xfrm.policy_bydst[dir].table + hash; } static void xfrm_dst_hash_transfer(struct hlist_head *list, @@ -407,10 +401,10 @@ static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) static void xfrm_bydst_resize(int dir) { - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *odst = xfrm_policy_bydst[dir].table; + struct hlist_head *odst = init_net.xfrm.policy_bydst[dir].table; struct hlist_head *ndst = xfrm_hash_alloc(nsize); int i; @@ -422,8 +416,8 @@ static void xfrm_bydst_resize(int dir) for (i = hmask; i >= 0; i--) xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); - xfrm_policy_bydst[dir].table = ndst; - xfrm_policy_bydst[dir].hmask = nhashmask; + init_net.xfrm.policy_bydst[dir].table = ndst; + init_net.xfrm.policy_bydst[dir].hmask = nhashmask; write_unlock_bh(&xfrm_policy_lock); @@ -458,7 +452,7 @@ static void xfrm_byidx_resize(int total) static inline int xfrm_bydst_should_resize(int dir, int *total) { unsigned int cnt = xfrm_policy_count[dir]; - unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask; if (total) *total += cnt; @@ -763,9 +757,9 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) return err; } } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) { hlist_for_each_entry(pol, entry, - xfrm_policy_bydst[dir].table + i, + init_net.xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) continue; @@ -827,10 +821,10 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) goto again1; } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) { again2: hlist_for_each_entry(pol, entry, - xfrm_policy_bydst[dir].table + i, + init_net.xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) continue; @@ -2154,8 +2148,8 @@ static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) &init_net.xfrm.policy_inexact[dir], bydst) prune_one_bundle(pol, func, &gc_list); - table = xfrm_policy_bydst[dir].table; - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + table = init_net.xfrm.policy_bydst[dir].table; + for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) { hlist_for_each_entry(pol, entry, table + i, bydst) prune_one_bundle(pol, func, &gc_list); } @@ -2415,11 +2409,11 @@ static int __net_init xfrm_policy_init(struct net *net) INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); - htab = &xfrm_policy_bydst[dir]; + htab = &net->xfrm.policy_bydst[dir]; htab->table = xfrm_hash_alloc(sz); - htab->hmask = hmask; if (!htab->table) - panic("XFRM: failed to allocate bydst hash\n"); + goto out_bydst; + htab->hmask = hmask; } INIT_LIST_HEAD(&net->xfrm.policy_all); @@ -2427,6 +2421,14 @@ static int __net_init xfrm_policy_init(struct net *net) register_netdevice_notifier(&xfrm_dev_notifier); return 0; +out_bydst: + for (dir--; dir >= 0; dir--) { + struct xfrm_policy_hash *htab; + + htab = &net->xfrm.policy_bydst[dir]; + xfrm_hash_free(htab->table, sz); + } + xfrm_hash_free(net->xfrm.policy_byidx, sz); out_byidx: return -ENOMEM; } @@ -2439,7 +2441,14 @@ static void xfrm_policy_fini(struct net *net) WARN_ON(!list_empty(&net->xfrm.policy_all)); for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy_hash *htab; + WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); + + htab = &net->xfrm.policy_bydst[dir]; + sz = (htab->hmask + 1); + WARN_ON(!hlist_empty(htab->table)); + xfrm_hash_free(htab->table, sz); } sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); -- cgit v1.2.3 From dc2caba7b321289e7d02e63d7216961ccecfa103 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:24:15 -0800 Subject: netns xfrm: per-netns policy counts Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + include/net/xfrm.h | 6 ++---- net/xfrm/xfrm_policy.c | 34 ++++++++++++++++------------------ 3 files changed, 19 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 39cfa799fa90..d5aadf06be46 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -37,6 +37,7 @@ struct netns_xfrm { unsigned int policy_idx_hmask; struct hlist_head policy_inexact[XFRM_POLICY_MAX * 2]; struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX * 2]; + unsigned int policy_count[XFRM_POLICY_MAX * 2]; }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1ab17565f01c..8699620f8c2d 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -559,8 +559,6 @@ struct xfrm_mgr extern int xfrm_register_km(struct xfrm_mgr *km); extern int xfrm_unregister_km(struct xfrm_mgr *km); -extern unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; - /* * This structure is used for the duration where packets are being * transformed by IPsec. As soon as the packet leaves IPsec the @@ -999,7 +997,7 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); - return (!xfrm_policy_count[dir] && !skb->sp) || + return (!init_net.xfrm.policy_count[dir] && !skb->sp) || (skb->dst->flags & DST_NOPOLICY) || __xfrm_policy_check(sk, ndir, skb, family); } @@ -1051,7 +1049,7 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { - return !xfrm_policy_count[XFRM_POLICY_OUT] || + return !init_net.xfrm.policy_count[XFRM_POLICY_OUT] || (skb->dst->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 929b2fdaa2ef..630ec048a0d3 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -46,9 +46,6 @@ EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); -unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; -EXPORT_SYMBOL(xfrm_policy_count); - static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; @@ -451,7 +448,7 @@ static void xfrm_byidx_resize(int total) static inline int xfrm_bydst_should_resize(int dir, int *total) { - unsigned int cnt = xfrm_policy_count[dir]; + unsigned int cnt = init_net.xfrm.policy_count[dir]; unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask; if (total) @@ -478,12 +475,12 @@ static inline int xfrm_byidx_should_resize(int total) void xfrm_spd_getinfo(struct xfrmk_spdinfo *si) { read_lock_bh(&xfrm_policy_lock); - si->incnt = xfrm_policy_count[XFRM_POLICY_IN]; - si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT]; - si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD]; - si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; - si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; - si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; + si->incnt = init_net.xfrm.policy_count[XFRM_POLICY_IN]; + si->outcnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT]; + si->fwdcnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD]; + si->inscnt = init_net.xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; + si->outscnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; + si->fwdscnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; si->spdhcnt = init_net.xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; read_unlock_bh(&xfrm_policy_lock); @@ -591,13 +588,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) else hlist_add_head(&policy->bydst, chain); xfrm_pol_hold(policy); - xfrm_policy_count[dir]++; + init_net.xfrm.policy_count[dir]++; atomic_inc(&flow_cache_genid); if (delpol) { hlist_del(&delpol->bydst); hlist_del(&delpol->byidx); list_del(&delpol->walk.all); - xfrm_policy_count[dir]--; + init_net.xfrm.policy_count[dir]--; } policy->index = delpol ? delpol->index : xfrm_gen_index(dir); hlist_add_head(&policy->byidx, init_net.xfrm.policy_byidx+idx_hash(policy->index)); @@ -673,7 +670,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, hlist_del(&pol->bydst); hlist_del(&pol->byidx); list_del(&pol->walk.all); - xfrm_policy_count[dir]--; + init_net.xfrm.policy_count[dir]--; } ret = pol; break; @@ -717,7 +714,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, hlist_del(&pol->bydst); hlist_del(&pol->byidx); list_del(&pol->walk.all); - xfrm_policy_count[dir]--; + init_net.xfrm.policy_count[dir]--; } ret = pol; break; @@ -845,7 +842,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) } } - xfrm_policy_count[dir] -= killed; + init_net.xfrm.policy_count[dir] -= killed; } atomic_inc(&flow_cache_genid); out: @@ -1079,7 +1076,7 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) list_add(&pol->walk.all, &init_net.xfrm.policy_all); hlist_add_head(&pol->bydst, chain); hlist_add_head(&pol->byidx, init_net.xfrm.policy_byidx+idx_hash(pol->index)); - xfrm_policy_count[dir]++; + init_net.xfrm.policy_count[dir]++; xfrm_pol_hold(pol); if (xfrm_bydst_should_resize(dir, NULL)) @@ -1095,7 +1092,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, hlist_del(&pol->bydst); hlist_del(&pol->byidx); list_del(&pol->walk.all); - xfrm_policy_count[dir]--; + init_net.xfrm.policy_count[dir]--; return pol; } @@ -1574,7 +1571,7 @@ restart: if (!policy) { /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || - !xfrm_policy_count[XFRM_POLICY_OUT]) + !init_net.xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; policy = flow_cache_lookup(fl, dst_orig->ops->family, @@ -2407,6 +2404,7 @@ static int __net_init xfrm_policy_init(struct net *net) for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { struct xfrm_policy_hash *htab; + net->xfrm.policy_count[dir] = 0; INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); htab = &net->xfrm.policy_bydst[dir]; -- cgit v1.2.3 From 66caf628c3b634c57b14a1a104dcd57e4fab2e3b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:28:57 -0800 Subject: netns xfrm: per-netns policy hash resizing work Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 1 + net/xfrm/xfrm_policy.c | 52 ++++++++++++++++++++++++------------------------ 2 files changed, 27 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index d5aadf06be46..c53d17357a49 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -38,6 +38,7 @@ struct netns_xfrm { struct hlist_head policy_inexact[XFRM_POLICY_MAX * 2]; struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX * 2]; unsigned int policy_count[XFRM_POLICY_MAX * 2]; + struct work_struct policy_hash_work; }; #endif diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 630ec048a0d3..1d300862dc04 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -396,12 +396,12 @@ static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) return ((old_hmask + 1) << 1) - 1; } -static void xfrm_bydst_resize(int dir) +static void xfrm_bydst_resize(struct net *net, int dir) { - unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *odst = init_net.xfrm.policy_bydst[dir].table; + struct hlist_head *odst = net->xfrm.policy_bydst[dir].table; struct hlist_head *ndst = xfrm_hash_alloc(nsize); int i; @@ -413,20 +413,20 @@ static void xfrm_bydst_resize(int dir) for (i = hmask; i >= 0; i--) xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); - init_net.xfrm.policy_bydst[dir].table = ndst; - init_net.xfrm.policy_bydst[dir].hmask = nhashmask; + net->xfrm.policy_bydst[dir].table = ndst; + net->xfrm.policy_bydst[dir].hmask = nhashmask; write_unlock_bh(&xfrm_policy_lock); xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } -static void xfrm_byidx_resize(int total) +static void xfrm_byidx_resize(struct net *net, int total) { - unsigned int hmask = init_net.xfrm.policy_idx_hmask; + unsigned int hmask = net->xfrm.policy_idx_hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *oidx = init_net.xfrm.policy_byidx; + struct hlist_head *oidx = net->xfrm.policy_byidx; struct hlist_head *nidx = xfrm_hash_alloc(nsize); int i; @@ -438,18 +438,18 @@ static void xfrm_byidx_resize(int total) for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); - init_net.xfrm.policy_byidx = nidx; - init_net.xfrm.policy_idx_hmask = nhashmask; + net->xfrm.policy_byidx = nidx; + net->xfrm.policy_idx_hmask = nhashmask; write_unlock_bh(&xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } -static inline int xfrm_bydst_should_resize(int dir, int *total) +static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total) { - unsigned int cnt = init_net.xfrm.policy_count[dir]; - unsigned int hmask = init_net.xfrm.policy_bydst[dir].hmask; + unsigned int cnt = net->xfrm.policy_count[dir]; + unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; if (total) *total += cnt; @@ -461,9 +461,9 @@ static inline int xfrm_bydst_should_resize(int dir, int *total) return 0; } -static inline int xfrm_byidx_should_resize(int total) +static inline int xfrm_byidx_should_resize(struct net *net, int total) { - unsigned int hmask = init_net.xfrm.policy_idx_hmask; + unsigned int hmask = net->xfrm.policy_idx_hmask; if ((hmask + 1) < xfrm_policy_hashmax && total > hmask) @@ -488,25 +488,24 @@ void xfrm_spd_getinfo(struct xfrmk_spdinfo *si) EXPORT_SYMBOL(xfrm_spd_getinfo); static DEFINE_MUTEX(hash_resize_mutex); -static void xfrm_hash_resize(struct work_struct *__unused) +static void xfrm_hash_resize(struct work_struct *work) { + struct net *net = container_of(work, struct net, xfrm.policy_hash_work); int dir, total; mutex_lock(&hash_resize_mutex); total = 0; for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { - if (xfrm_bydst_should_resize(dir, &total)) - xfrm_bydst_resize(dir); + if (xfrm_bydst_should_resize(net, dir, &total)) + xfrm_bydst_resize(net, dir); } - if (xfrm_byidx_should_resize(total)) - xfrm_byidx_resize(total); + if (xfrm_byidx_should_resize(net, total)) + xfrm_byidx_resize(net, total); mutex_unlock(&hash_resize_mutex); } -static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize); - /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ static u32 xfrm_gen_index(int dir) @@ -607,8 +606,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) if (delpol) xfrm_policy_kill(delpol); - else if (xfrm_bydst_should_resize(dir, NULL)) - schedule_work(&xfrm_hash_work); + else if (xfrm_bydst_should_resize(&init_net, dir, NULL)) + schedule_work(&init_net.xfrm.policy_hash_work); read_lock_bh(&xfrm_policy_lock); gc_list = NULL; @@ -1079,8 +1078,8 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) init_net.xfrm.policy_count[dir]++; xfrm_pol_hold(pol); - if (xfrm_bydst_should_resize(dir, NULL)) - schedule_work(&xfrm_hash_work); + if (xfrm_bydst_should_resize(&init_net, dir, NULL)) + schedule_work(&init_net.xfrm.policy_hash_work); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, @@ -2415,6 +2414,7 @@ static int __net_init xfrm_policy_init(struct net *net) } INIT_LIST_HEAD(&net->xfrm.policy_all); + INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); if (net_eq(net, &init_net)) register_netdevice_notifier(&xfrm_dev_notifier); return 0; -- cgit v1.2.3 From 0e6024519b4da2d9413b97be1de8122d5709ccc1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:30:18 -0800 Subject: netns xfrm: state flush in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/key/af_key.c | 2 +- net/xfrm/xfrm_state.c | 18 +++++++++--------- net/xfrm/xfrm_user.c | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 8699620f8c2d..e4bb67225610 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1363,7 +1363,7 @@ struct xfrmk_spdinfo { extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); -extern int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info); +extern int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info); extern void xfrm_sad_getinfo(struct xfrmk_sadinfo *si); extern void xfrm_spd_getinfo(struct xfrmk_spdinfo *si); extern int xfrm_replay_check(struct xfrm_state *x, diff --git a/net/key/af_key.c b/net/key/af_key.c index 036315d6b665..e5d595a60921 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1732,7 +1732,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); audit_info.secid = 0; - err = xfrm_state_flush(proto, &audit_info); + err = xfrm_state_flush(&init_net, proto, &audit_info); if (err) return err; c.data.proto = proto; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f3f635d4ee66..5f4c5340ba30 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -576,15 +576,15 @@ EXPORT_SYMBOL(xfrm_state_delete); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info) { int i, err = 0; - for (i = 0; i <= init_net.xfrm.state_hmask; i++) { + for (i = 0; i <= net->xfrm.state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto) && (err = security_xfrm_state_delete(x)) != 0) { xfrm_audit_state_delete(x, 0, @@ -600,26 +600,26 @@ xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) } #else static inline int -xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info) +xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audit_info) { return 0; } #endif -int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info) +int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info) { int i, err = 0; spin_lock_bh(&xfrm_state_lock); - err = xfrm_state_flush_secctx_check(proto, audit_info); + err = xfrm_state_flush_secctx_check(net, proto, audit_info); if (err) goto out; - for (i = 0; i <= init_net.xfrm.state_hmask; i++) { + for (i = 0; i <= net->xfrm.state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; restart: - hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -641,7 +641,7 @@ restart: out: spin_unlock_bh(&xfrm_state_lock); - wake_up(&init_net.xfrm.km_waitq); + wake_up(&net->xfrm.km_waitq); return err; } EXPORT_SYMBOL(xfrm_state_flush); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 765c01e784e9..49a7e897ba96 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1398,7 +1398,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, audit_info.loginuid = NETLINK_CB(skb).loginuid; audit_info.sessionid = NETLINK_CB(skb).sessionid; audit_info.secid = NETLINK_CB(skb).sid; - err = xfrm_state_flush(p->proto, &audit_info); + err = xfrm_state_flush(&init_net, p->proto, &audit_info); if (err) return err; c.data.proto = p->proto; -- cgit v1.2.3 From 221df1ed33c9284fc7a6f6e47ca7f8d5f3665d43 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:30:50 -0800 Subject: netns xfrm: state lookup in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++-- net/ipv4/ah4.c | 2 +- net/ipv4/esp4.c | 2 +- net/ipv4/ipcomp.c | 4 ++-- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 2 +- net/ipv6/ipcomp6.c | 4 ++-- net/ipv6/xfrm6_input.c | 2 +- net/key/af_key.c | 2 +- net/xfrm/xfrm_input.c | 2 +- net/xfrm/xfrm_state.c | 34 +++++++++++++++++++--------------- net/xfrm/xfrm_user.c | 12 ++++++------ 12 files changed, 38 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e4bb67225610..15136c5e2622 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1323,8 +1323,8 @@ extern int xfrm_state_check_expire(struct xfrm_state *x); extern void xfrm_state_insert(struct xfrm_state *x); extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); -extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family); -extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); +extern struct xfrm_state *xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family); +extern struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); #ifdef CONFIG_XFRM_SUB_POLICY extern int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, unsigned short family); diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 992ecd8662e2..750426b0a276 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -209,7 +209,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 95a9c65003f8..35950128aa94 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -421,7 +421,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); if (!x) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 0a35f1b6f22c..3262ce06294c 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -35,7 +35,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) return; spi = htonl(ntohs(ipch->cpi)); - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET); if (!x) return; @@ -85,7 +85,7 @@ static int ipcomp_tunnel_attach(struct xfrm_state *x) int err = 0; struct xfrm_state *t; - t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr.a4, + t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr.a4, x->props.saddr.a4, IPPROTO_IPIP, AF_INET); if (!t) { t = ipcomp_tunnel_create(x); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 13e330d89178..6ae014b86b69 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -415,7 +415,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) return; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index c02a6308defe..68f2af8c15c0 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -364,7 +364,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index c369638e208a..3a0b3be7ece5 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -63,7 +63,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; spi = htonl(ntohs(ipcomph->cpi)); - x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); + x = xfrm_state_lookup(&init_net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) return; @@ -114,7 +114,7 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x) spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr); if (spi) - t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr, + t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr, spi, IPPROTO_IPV6, AF_INET6); if (!t) { t = ipcomp6_tunnel_create(x); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a71c7ddcb41e..b69766a77743 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -100,7 +100,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, break; } - x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6); + x = xfrm_state_lookup_byaddr(&init_net, dst, src, proto, AF_INET6); if (!x) continue; diff --git a/net/key/af_key.c b/net/key/af_key.c index e5d595a60921..449a5d03e283 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -683,7 +683,7 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void ** if (!xaddr) return NULL; - return xfrm_state_lookup(xaddr, sa->sadb_sa_spi, proto, family); + return xfrm_state_lookup(&init_net, xaddr, sa->sadb_sa_spi, proto, family); } #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1))) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 75279402ccf4..c08a93e98a36 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -151,7 +151,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - x = xfrm_state_lookup(daddr, spi, nexthdr, family); + x = xfrm_state_lookup(&init_net, daddr, spi, nexthdr, family); if (x == NULL) { XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound(skb, family, spi, seq); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5f4c5340ba30..afde47498cdc 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -670,13 +670,13 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, return 0; } -static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { - unsigned int h = xfrm_spi_hash(&init_net, daddr, spi, proto, family); + unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; struct hlist_node *entry; - hlist_for_each_entry(x, entry, init_net.xfrm.state_byspi+h, byspi) { + hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto) @@ -702,13 +702,13 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, return NULL; } -static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { - unsigned int h = xfrm_src_hash(&init_net, daddr, saddr, family); + unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; struct hlist_node *entry; - hlist_for_each_entry(x, entry, init_net.xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || x->id.proto != proto) continue; @@ -740,11 +740,13 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) { + struct net *net = xs_net(x); + if (use_spi) - return __xfrm_state_lookup(&x->id.daddr, x->id.spi, + return __xfrm_state_lookup(net, &x->id.daddr, x->id.spi, x->id.proto, family); else - return __xfrm_state_lookup_byaddr(&x->id.daddr, + return __xfrm_state_lookup_byaddr(net, &x->id.daddr, &x->props.saddr, x->id.proto, family); } @@ -818,7 +820,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi, + (x0 = __xfrm_state_lookup(&init_net, daddr, tmpl->id.spi, tmpl->id.proto, family)) != NULL) { to_put = x0; error = -EEXIST; @@ -1361,26 +1363,27 @@ int xfrm_state_check_expire(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_check_expire); struct xfrm_state * -xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, +xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup(daddr, spi, proto, family); + x = __xfrm_state_lookup(net, daddr, spi, proto, family); spin_unlock_bh(&xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup); struct xfrm_state * -xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, +xfrm_state_lookup_byaddr(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family); + x = __xfrm_state_lookup_byaddr(net, daddr, saddr, proto, family); spin_unlock_bh(&xfrm_state_lock); return x; } @@ -1486,6 +1489,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq); int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) { + struct net *net = xs_net(x); unsigned int h; struct xfrm_state *x0; int err = -ENOENT; @@ -1503,7 +1507,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) err = -ENOENT; if (minspi == maxspi) { - x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family); + x0 = xfrm_state_lookup(net, &x->id.daddr, minspi, x->id.proto, x->props.family); if (x0) { xfrm_state_put(x0); goto unlock; @@ -1513,7 +1517,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) u32 spi = 0; for (h=0; hid.daddr, htonl(spi), x->id.proto, x->props.family); + x0 = xfrm_state_lookup(net, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { x->id.spi = htonl(spi); break; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 49a7e897ba96..e02ef3361190 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -440,7 +440,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { err = -ESRCH; - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + x = xfrm_state_lookup(&init_net, &p->daddr, p->spi, p->proto, p->family); } else { xfrm_address_t *saddr = NULL; @@ -451,8 +451,8 @@ static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, } err = -ESRCH; - x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto, - p->family); + x = xfrm_state_lookup_byaddr(&init_net, &p->daddr, saddr, + p->proto, p->family); } out: @@ -1468,7 +1468,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (r_skb == NULL) return -ENOMEM; - x = xfrm_state_lookup(&id->daddr, id->spi, id->proto, id->family); + x = xfrm_state_lookup(&init_net, &id->daddr, id->spi, id->proto, id->family); if (x == NULL) { kfree_skb(r_skb); return -ESRCH; @@ -1509,7 +1509,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (!(nlh->nlmsg_flags&NLM_F_REPLACE)) return err; - x = xfrm_state_lookup(&p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); + x = xfrm_state_lookup(&init_net, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); if (x == NULL) return -ESRCH; @@ -1628,7 +1628,7 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_user_expire *ue = nlmsg_data(nlh); struct xfrm_usersa_info *p = &ue->state; - x = xfrm_state_lookup(&p->id.daddr, p->id.spi, p->id.proto, p->family); + x = xfrm_state_lookup(&init_net, &p->id.daddr, p->id.spi, p->id.proto, p->family); err = -ENOENT; if (x == NULL) -- cgit v1.2.3 From 5447c5e401c49aba0c36bb1066f2d25b152553b7 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:31:51 -0800 Subject: netns xfrm: finding states in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 7 ++--- net/core/pktgen.c | 3 ++- net/key/af_key.c | 6 ++--- net/xfrm/xfrm_state.c | 73 +++++++++++++++++++++++++++------------------------ net/xfrm/xfrm_user.c | 4 +-- 5 files changed, 49 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 15136c5e2622..4cbd0557c698 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1315,7 +1315,8 @@ extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family); -extern struct xfrm_state * xfrm_stateonly_find(xfrm_address_t *daddr, +extern struct xfrm_state * xfrm_stateonly_find(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, u8 mode, u8 proto, u32 reqid); @@ -1361,7 +1362,7 @@ struct xfrmk_spdinfo { u32 spdhmcnt; }; -extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); +extern struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info); extern void xfrm_sad_getinfo(struct xfrmk_sadinfo *si); @@ -1446,7 +1447,7 @@ struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete, int *err); int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info); u32 xfrm_get_acqseq(void); extern int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); -struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, +struct xfrm_state * xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 15e0c2c7aacf..65498483325a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2165,7 +2165,8 @@ static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) struct xfrm_state *x = pkt_dev->flows[flow].x; if (!x) { /*slow path: we dont already have xfrm_state*/ - x = xfrm_stateonly_find((xfrm_address_t *)&pkt_dev->cur_daddr, + x = xfrm_stateonly_find(&init_net, + (xfrm_address_t *)&pkt_dev->cur_daddr, (xfrm_address_t *)&pkt_dev->cur_saddr, AF_INET, pkt_dev->ipsmode, diff --git a/net/key/af_key.c b/net/key/af_key.c index 449a5d03e283..4ef0827009e9 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1348,7 +1348,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h } if (hdr->sadb_msg_seq) { - x = xfrm_find_acq_byseq(hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(&init_net, hdr->sadb_msg_seq); if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; @@ -1356,7 +1356,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h } if (!x) - x = xfrm_find_acq(mode, reqid, proto, xdaddr, xsaddr, 1, family); + x = xfrm_find_acq(&init_net, mode, reqid, proto, xdaddr, xsaddr, 1, family); if (x == NULL) return -ENOENT; @@ -1404,7 +1404,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg * if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0) return 0; - x = xfrm_find_acq_byseq(hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(&init_net, hdr->sadb_msg_seq); if (x == NULL) return 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index cd51e4e3d023..0d974fc9dd6c 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -765,6 +765,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_policy *pol, int *err, unsigned short family) { + struct net *net = xp_net(pol); unsigned int h; struct hlist_node *entry; struct xfrm_state *x, *x0, *to_put; @@ -775,8 +776,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, to_put = NULL; spin_lock_bh(&xfrm_state_lock); - h = xfrm_dst_hash(&init_net, daddr, saddr, tmpl->reqid, family); - hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { + h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && @@ -820,13 +821,13 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = __xfrm_state_lookup(&init_net, daddr, tmpl->id.spi, + (x0 = __xfrm_state_lookup(net, daddr, tmpl->id.spi, tmpl->id.proto, family)) != NULL) { to_put = x0; error = -EEXIST; goto out; } - x = xfrm_state_alloc(&init_net); + x = xfrm_state_alloc(net); if (x == NULL) { error = -ENOMEM; goto out; @@ -845,19 +846,19 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add(&x->km.all, &init_net.xfrm.state_all); - hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h); - h = xfrm_src_hash(&init_net, daddr, saddr, family); - hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h); + list_add(&x->km.all, &net->xfrm.state_all); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + h = xfrm_src_hash(net, daddr, saddr, family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); if (x->id.spi) { - h = xfrm_spi_hash(&init_net, &x->id.daddr, x->id.spi, x->id.proto, family); - hlist_add_head(&x->byspi, init_net.xfrm.state_byspi+h); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family); + hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); - init_net.xfrm.state_num++; - xfrm_hash_grow_check(&init_net, x->bydst.next != NULL); + net->xfrm.state_num++; + xfrm_hash_grow_check(net, x->bydst.next != NULL); } else { x->km.state = XFRM_STATE_DEAD; to_put = x; @@ -877,7 +878,8 @@ out: } struct xfrm_state * -xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, +xfrm_stateonly_find(struct net *net, + xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, u8 mode, u8 proto, u32 reqid) { unsigned int h; @@ -885,8 +887,8 @@ xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct hlist_node *entry; spin_lock(&xfrm_state_lock); - h = xfrm_dst_hash(&init_net, daddr, saddr, reqid, family); - hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { + h = xfrm_dst_hash(net, daddr, saddr, reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && @@ -972,13 +974,13 @@ void xfrm_state_insert(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ -static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { - unsigned int h = xfrm_dst_hash(&init_net, daddr, saddr, reqid, family); + unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -1010,7 +1012,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re if (!create) return NULL; - x = xfrm_state_alloc(&init_net); + x = xfrm_state_alloc(net); if (likely(x)) { switch (family) { case AF_INET: @@ -1045,23 +1047,24 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re xfrm_state_hold(x); x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); - list_add(&x->km.all, &init_net.xfrm.state_all); - hlist_add_head(&x->bydst, init_net.xfrm.state_bydst+h); - h = xfrm_src_hash(&init_net, daddr, saddr, family); - hlist_add_head(&x->bysrc, init_net.xfrm.state_bysrc+h); + list_add(&x->km.all, &net->xfrm.state_all); + hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + h = xfrm_src_hash(net, daddr, saddr, family); + hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); - init_net.xfrm.state_num++; + net->xfrm.state_num++; - xfrm_hash_grow_check(&init_net, x->bydst.next != NULL); + xfrm_hash_grow_check(net, x->bydst.next != NULL); } return x; } -static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq); int xfrm_state_add(struct xfrm_state *x) { + struct net *net = xs_net(x); struct xfrm_state *x1, *to_put; int family; int err; @@ -1082,7 +1085,7 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && x->km.seq) { - x1 = __xfrm_find_acq_byseq(x->km.seq); + x1 = __xfrm_find_acq_byseq(net, x->km.seq); if (x1 && ((x1->id.proto != x->id.proto) || xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) { to_put = x1; @@ -1091,7 +1094,7 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && !x1) - x1 = __find_acq_core(family, x->props.mode, x->props.reqid, + x1 = __find_acq_core(net, family, x->props.mode, x->props.reqid, x->id.proto, &x->id.daddr, &x->props.saddr, 0); @@ -1390,14 +1393,14 @@ xfrm_state_lookup_byaddr(struct net *net, EXPORT_SYMBOL(xfrm_state_lookup_byaddr); struct xfrm_state * -xfrm_find_acq(u8 mode, u32 reqid, u8 proto, +xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create); + x = __find_acq_core(net, family, mode, reqid, proto, daddr, saddr, create); spin_unlock_bh(&xfrm_state_lock); return x; @@ -1444,15 +1447,15 @@ EXPORT_SYMBOL(xfrm_state_sort); /* Silly enough, but I'm lazy to build resolution list */ -static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq) { int i; - for (i = 0; i <= init_net.xfrm.state_hmask; i++) { + for (i = 0; i <= net->xfrm.state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; - hlist_for_each_entry(x, entry, init_net.xfrm.state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); @@ -1463,12 +1466,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) return NULL; } -struct xfrm_state *xfrm_find_acq_byseq(u32 seq) +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_find_acq_byseq(seq); + x = __xfrm_find_acq_byseq(net, seq); spin_unlock_bh(&xfrm_state_lock); return x; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e02ef3361190..3d577440b673 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -837,7 +837,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, x = NULL; if (p->info.seq) { - x = xfrm_find_acq_byseq(p->info.seq); + x = xfrm_find_acq_byseq(&init_net, p->info.seq); if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; @@ -845,7 +845,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, } if (!x) - x = xfrm_find_acq(p->info.mode, p->info.reqid, + x = xfrm_find_acq(&init_net, p->info.mode, p->info.reqid, p->info.id.proto, daddr, &p->info.saddr, 1, family); -- cgit v1.2.3 From 284fa7da300adcb700b44df2f64a536b434d4650 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:32:14 -0800 Subject: netns xfrm: state walking in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/key/af_key.c | 2 +- net/xfrm/xfrm_state.c | 6 +++--- net/xfrm/xfrm_user.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4cbd0557c698..40ed4878bc12 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1307,7 +1307,7 @@ extern int xfrm_proc_init(void); #endif extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); -extern int xfrm_state_walk(struct xfrm_state_walk *walk, +extern int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); extern void xfrm_state_walk_done(struct xfrm_state_walk *walk); extern struct xfrm_state *xfrm_state_alloc(struct net *net); diff --git a/net/key/af_key.c b/net/key/af_key.c index 4ef0827009e9..b74d939e2eed 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1776,7 +1776,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) static int pfkey_dump_sa(struct pfkey_sock *pfk) { - return xfrm_state_walk(&pfk->dump.u.state, dump_sa, (void *) pfk); + return xfrm_state_walk(&init_net, &pfk->dump.u.state, dump_sa, (void *) pfk); } static void pfkey_dump_sa_done(struct pfkey_sock *pfk) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0d974fc9dd6c..ea340bbbcc64 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1544,7 +1544,7 @@ unlock: } EXPORT_SYMBOL(xfrm_alloc_spi); -int xfrm_state_walk(struct xfrm_state_walk *walk, +int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *data) { @@ -1557,10 +1557,10 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, spin_lock_bh(&xfrm_state_lock); if (list_empty(&walk->all)) - x = list_first_entry(&init_net.xfrm.state_all, struct xfrm_state_walk, all); + x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all); else x = list_entry(&walk->all, struct xfrm_state_walk, all); - list_for_each_entry_from(x, &init_net.xfrm.state_all, all) { + list_for_each_entry_from(x, &net->xfrm.state_all, all) { if (x->state == XFRM_STATE_DEAD) continue; state = container_of(x, struct xfrm_state, km); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 3d577440b673..787b0ee65034 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -631,7 +631,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) xfrm_state_walk_init(walk, 0); } - (void) xfrm_state_walk(walk, dump_one_state, &info); + (void) xfrm_state_walk(&init_net, walk, dump_one_state, &info); return skb->len; } -- cgit v1.2.3 From 33ffbbd52c327225a3e28485c39dc5746d81be03 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:33:32 -0800 Subject: netns xfrm: policy flushing in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/key/af_key.c | 2 +- net/xfrm/xfrm_policy.c | 22 +++++++++++----------- net/xfrm/xfrm_user.c | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 40ed4878bc12..766cc71e96d4 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1444,7 +1444,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, struct xfrm_sec_ctx *ctx, int delete, int *err); struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete, int *err); -int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info); +int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info); u32 xfrm_get_acqseq(void); extern int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); struct xfrm_state * xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto, diff --git a/net/key/af_key.c b/net/key/af_key.c index b74d939e2eed..0f44856c1f12 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2686,7 +2686,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); audit_info.secid = 0; - err = xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN, &audit_info); + err = xfrm_policy_flush(&init_net, XFRM_POLICY_TYPE_MAIN, &audit_info); if (err) return err; c.data.type = XFRM_POLICY_TYPE_MAIN; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 11fee87a0cc1..7c264a74edc0 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -732,7 +732,7 @@ EXPORT_SYMBOL(xfrm_policy_byid); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) { int dir, err = 0; @@ -742,7 +742,7 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) int i; hlist_for_each_entry(pol, entry, - &init_net.xfrm.policy_inexact[dir], bydst) { + &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; err = security_xfrm_policy_delete(pol->security); @@ -754,9 +754,9 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) return err; } } - for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) { + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { hlist_for_each_entry(pol, entry, - init_net.xfrm.policy_bydst[dir].table + i, + net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) continue; @@ -776,19 +776,19 @@ xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) } #else static inline int -xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) { return 0; } #endif -int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) +int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) { int dir, err = 0; write_lock_bh(&xfrm_policy_lock); - err = xfrm_policy_flush_secctx_check(type, audit_info); + err = xfrm_policy_flush_secctx_check(net, type, audit_info); if (err) goto out; @@ -800,7 +800,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) killed = 0; again1: hlist_for_each_entry(pol, entry, - &init_net.xfrm.policy_inexact[dir], bydst) { + &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; hlist_del(&pol->bydst); @@ -818,10 +818,10 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) goto again1; } - for (i = init_net.xfrm.policy_bydst[dir].hmask; i >= 0; i--) { + for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { again2: hlist_for_each_entry(pol, entry, - init_net.xfrm.policy_bydst[dir].table + i, + net->xfrm.policy_bydst[dir].table + i, bydst) { if (pol->type != type) continue; @@ -842,7 +842,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) } } - init_net.xfrm.policy_count[dir] -= killed; + net->xfrm.policy_count[dir] -= killed; } atomic_inc(&flow_cache_genid); out: diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 787b0ee65034..d4983e831c34 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1546,7 +1546,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, audit_info.loginuid = NETLINK_CB(skb).loginuid; audit_info.sessionid = NETLINK_CB(skb).sessionid; audit_info.secid = NETLINK_CB(skb).sid; - err = xfrm_policy_flush(type, &audit_info); + err = xfrm_policy_flush(&init_net, type, &audit_info); if (err) return err; c.data.type = type; -- cgit v1.2.3 From 8d1211a6aaea43ea36151c17b0193eb763ff2d7e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:34:20 -0800 Subject: netns xfrm: finding policy in netns Add netns parameter to xfrm_policy_bysel_ctx(), xfrm_policy_byidx(). Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++-- net/key/af_key.c | 6 +++--- net/xfrm/xfrm_policy.c | 14 +++++++------- net/xfrm/xfrm_user.c | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 766cc71e96d4..ec2b7a9b3aa9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1439,11 +1439,11 @@ extern int xfrm_policy_walk(struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *); extern void xfrm_policy_walk_done(struct xfrm_policy_walk *walk); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err); -struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete, int *err); +struct xfrm_policy *xfrm_policy_byid(struct net *net, u8, int dir, u32 id, int delete, int *err); int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info); u32 xfrm_get_acqseq(void); extern int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); diff --git a/net/key/af_key.c b/net/key/af_key.c index 0f44856c1f12..ca268116ac11 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2324,7 +2324,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg return err; } - xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_bysel_ctx(&init_net, XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir - 1, &sel, pol_ctx, 1, &err); security_xfrm_policy_free(pol_ctx); @@ -2571,8 +2571,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return -EINVAL; delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2); - xp = xfrm_policy_byid(XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, - delete, &err); + xp = xfrm_policy_byid(&init_net, XFRM_POLICY_TYPE_MAIN, dir, + pol->sadb_x_policy_id, delete, &err); if (xp == NULL) return -ENOENT; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7c264a74edc0..96895ef61858 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -642,7 +642,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err) @@ -653,7 +653,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, *err = 0; write_lock_bh(&xfrm_policy_lock); - chain = policy_hash_bysel(&init_net, sel, sel->family, dir); + chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { if (pol->type == type && @@ -670,7 +670,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, hlist_del(&pol->bydst); hlist_del(&pol->byidx); list_del(&pol->walk.all); - init_net.xfrm.policy_count[dir]--; + net->xfrm.policy_count[dir]--; } ret = pol; break; @@ -686,8 +686,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, - int *err) +struct xfrm_policy *xfrm_policy_byid(struct net *net, u8 type, int dir, u32 id, + int delete, int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; @@ -699,7 +699,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, *err = 0; write_lock_bh(&xfrm_policy_lock); - chain = init_net.xfrm.policy_byidx + idx_hash(&init_net, id); + chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, entry, chain, byidx) { if (pol->type == type && pol->index == id) { @@ -714,7 +714,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, hlist_del(&pol->bydst); hlist_del(&pol->byidx); list_del(&pol->walk.all); - init_net.xfrm.policy_count[dir]--; + net->xfrm.policy_count[dir]--; } ret = pol; break; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index d4983e831c34..efd6ab5c0aca 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1330,7 +1330,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return err; if (p->index) - xp = xfrm_policy_byid(type, p->dir, p->index, delete, &err); + xp = xfrm_policy_byid(&init_net, type, p->dir, p->index, delete, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1347,7 +1347,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, ctx, + xp = xfrm_policy_bysel_ctx(&init_net, type, p->dir, &p->sel, ctx, delete, &err); security_xfrm_policy_free(ctx); } @@ -1571,7 +1571,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, return err; if (p->index) - xp = xfrm_policy_byid(type, p->dir, p->index, 0, &err); + xp = xfrm_policy_byid(&init_net, type, p->dir, p->index, 0, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1588,7 +1588,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, ctx, 0, &err); + xp = xfrm_policy_bysel_ctx(&init_net, type, p->dir, &p->sel, ctx, 0, &err); security_xfrm_policy_free(ctx); } if (xp == NULL) -- cgit v1.2.3 From cdcbca7c1f1946758cfacb69bc1c7eeaccb11e2d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:34:49 -0800 Subject: netns xfrm: policy walking in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/key/af_key.c | 4 ++-- net/xfrm/xfrm_policy.c | 6 +++--- net/xfrm/xfrm_user.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index ec2b7a9b3aa9..1dc4ff0f4851 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1435,7 +1435,7 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); extern void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type); -extern int xfrm_policy_walk(struct xfrm_policy_walk *walk, +extern int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *); extern void xfrm_policy_walk_done(struct xfrm_policy_walk *walk); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); diff --git a/net/key/af_key.c b/net/key/af_key.c index ca268116ac11..a0d849848ddd 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1846,7 +1846,7 @@ static u32 gen_reqid(void) if (reqid == 0) reqid = IPSEC_MANUAL_REQID_MAX+1; xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN); - rc = xfrm_policy_walk(&walk, check_reqid, (void*)&reqid); + rc = xfrm_policy_walk(&init_net, &walk, check_reqid, (void*)&reqid); xfrm_policy_walk_done(&walk); if (rc != -EEXIST) return reqid; @@ -2633,7 +2633,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) static int pfkey_dump_sp(struct pfkey_sock *pfk) { - return xfrm_policy_walk(&pfk->dump.u.policy, dump_sp, (void *) pfk); + return xfrm_policy_walk(&init_net, &pfk->dump.u.policy, dump_sp, (void *) pfk); } static void pfkey_dump_sp_done(struct pfkey_sock *pfk) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 96895ef61858..6165218fd7c2 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -851,7 +851,7 @@ out: } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(struct xfrm_policy_walk *walk, +int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { @@ -868,10 +868,10 @@ int xfrm_policy_walk(struct xfrm_policy_walk *walk, write_lock_bh(&xfrm_policy_lock); if (list_empty(&walk->walk.all)) - x = list_first_entry(&init_net.xfrm.policy_all, struct xfrm_policy_walk_entry, all); + x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); else x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); - list_for_each_entry_from(x, &init_net.xfrm.policy_all, all) { + list_for_each_entry_from(x, &net->xfrm.policy_all, all) { if (x->dead) continue; pol = container_of(x, struct xfrm_policy, walk); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index efd6ab5c0aca..f6e02726cf1b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1279,7 +1279,7 @@ static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); } - (void) xfrm_policy_walk(walk, dump_one_policy, &info); + (void) xfrm_policy_walk(&init_net, walk, dump_one_policy, &info); return skb->len; } -- cgit v1.2.3 From 52479b623d3d41df84c499325b6a8c7915413032 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:35:18 -0800 Subject: netns xfrm: lookup in netns Pass netns to xfrm_lookup()/__xfrm_lookup(). For that pass netns to flow_cache_lookup() and resolver callback. Take it from socket or netdevice. Stub DECnet to init_net. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/dst.h | 16 ++++++++-------- include/net/flow.h | 9 +++++---- net/core/flow.c | 4 ++-- net/dccp/ipv6.c | 10 +++++----- net/decnet/dn_route.c | 6 +++--- net/ipv4/icmp.c | 4 ++-- net/ipv4/netfilter.c | 4 ++-- net/ipv4/route.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipv6/datagram.c | 3 ++- net/ipv6/icmp.c | 6 +++--- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_tunnel.c | 5 +++-- net/ipv6/mcast.c | 4 ++-- net/ipv6/ndisc.c | 4 ++-- net/ipv6/netfilter.c | 2 +- net/ipv6/netfilter/ip6t_REJECT.c | 2 +- net/ipv6/raw.c | 3 ++- net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 11 ++++++----- net/ipv6/udp.c | 3 ++- net/xfrm/xfrm_policy.c | 38 ++++++++++++++++++++------------------ 22 files changed, 75 insertions(+), 67 deletions(-) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 6c778799bf10..6be3b082a070 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -291,21 +291,21 @@ enum { struct flowi; #ifndef CONFIG_XFRM -static inline int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) +static inline int xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags) { return 0; } -static inline int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags) +static inline int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags) { return 0; } #else -extern int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags); -extern int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, - struct sock *sk, int flags); +extern int xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags); +extern int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, + struct flowi *fl, struct sock *sk, int flags); #endif #endif diff --git a/include/net/flow.h b/include/net/flow.h index b45a5e4fcadd..809970b7dfee 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -84,12 +84,13 @@ struct flowi { #define FLOW_DIR_OUT 1 #define FLOW_DIR_FWD 2 +struct net; struct sock; -typedef int (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, - void **objp, atomic_t **obj_refp); +typedef int (*flow_resolve_t)(struct net *net, struct flowi *key, u16 family, + u8 dir, void **objp, atomic_t **obj_refp); -extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, - flow_resolve_t resolver); +extern void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, + u8 dir, flow_resolve_t resolver); extern void flow_cache_flush(void); extern atomic_t flow_cache_genid; diff --git a/net/core/flow.c b/net/core/flow.c index d323388dd1ba..96015871ecea 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -165,7 +165,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) return 0; } -void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, +void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver) { struct flow_cache_entry *fle, **head; @@ -225,7 +225,7 @@ nocache: void *obj; atomic_t *obj_ref; - err = resolver(key, family, dir, &obj, &obj_ref); + err = resolver(net, key, family, dir, &obj, &obj_ref); if (fle && !err) { fle->genid = atomic_read(&flow_cache_genid); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index f033e845bb07..b963f35c65f6 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -168,7 +168,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(net, &dst, &fl, sk, 0); if (err < 0) { sk->sk_err_soft = -err; goto out; @@ -279,7 +279,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0); if (err < 0) goto done; @@ -343,7 +343,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(ctl_sk, &skb->dst, &fl)) { - if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { + if (xfrm_lookup(net, &skb->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, skb, &fl, NULL, 0); DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); @@ -569,7 +569,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out; } @@ -1004,7 +1004,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT); + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 768df000523b..eeaa3d819f9c 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1184,7 +1184,7 @@ static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int f err = __dn_route_output_key(pprt, flp, flags); if (err == 0 && flp->proto) { - err = xfrm_lookup(pprt, flp, NULL, 0); + err = xfrm_lookup(&init_net, pprt, flp, NULL, 0); } return err; } @@ -1195,8 +1195,8 @@ int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); if (err == 0 && fl->proto) { - err = xfrm_lookup(pprt, fl, sk, (flags & MSG_DONTWAIT) ? - 0 : XFRM_LOOKUP_WAIT); + err = xfrm_lookup(&init_net, pprt, fl, sk, + (flags & MSG_DONTWAIT) ? 0 : XFRM_LOOKUP_WAIT); } return err; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 7b88be9803b1..705b33b184a3 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -562,7 +562,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) /* No need to clone since we're just using its address. */ rt2 = rt; - err = xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); + err = xfrm_lookup(net, (struct dst_entry **)&rt, &fl, NULL, 0); switch (err) { case 0: if (rt != rt2) @@ -601,7 +601,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) if (err) goto relookup_failed; - err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL, + err = xfrm_lookup(net, (struct dst_entry **)&rt2, &fl, NULL, XFRM_LOOKUP_ICMP); switch (err) { case 0: diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6efdb70b3eb2..c99eecf89da5 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -66,7 +66,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, &fl, AF_INET) == 0) - if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0)) return -1; #endif @@ -97,7 +97,7 @@ int ip_xfrm_me_harder(struct sk_buff *skb) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); - if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0) + if (xfrm_lookup(dev_net(dst->dev), &dst, &fl, skb->sk, 0) < 0) return -1; dst_release(skb->dst); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 4e6959c29819..77bfba975959 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2761,7 +2761,7 @@ int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, flp->fl4_src = (*rp)->rt_src; if (!flp->fl4_dst) flp->fl4_dst = (*rp)->rt_dst; - err = __xfrm_lookup((struct dst_entry **)rp, flp, sk, + err = __xfrm_lookup(net, (struct dst_entry **)rp, flp, sk, flags ? XFRM_LOOKUP_WAIT : 0); if (err == -EREMOTE) err = ipv4_dst_blackhole(net, rp, flp); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 01edac888510..437b750b98fd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -637,7 +637,7 @@ int inet6_sk_rebuild_header(struct sock *sk) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; return err; } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index e44deb8d4df2..e2bdc6d83a43 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -175,7 +175,8 @@ ipv4_connected: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a77b8d103804..4f433847d95f 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -427,7 +427,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, /* No need to clone since we're just using its address. */ dst2 = dst; - err = xfrm_lookup(&dst, &fl, sk, 0); + err = xfrm_lookup(net, &dst, &fl, sk, 0); switch (err) { case 0: if (dst != dst2) @@ -446,7 +446,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (ip6_dst_lookup(sk, &dst2, &fl)) goto relookup_failed; - err = xfrm_lookup(&dst2, &fl, sk, XFRM_LOOKUP_ICMP); + err = xfrm_lookup(net, &dst2, &fl, sk, XFRM_LOOKUP_ICMP); switch (err) { case 0: dst_release(dst); @@ -552,7 +552,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto out; - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) goto out; if (ipv6_addr_is_multicast(&fl.fl6_dst)) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 16d43f20b32f..3c3732d50c1a 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -219,7 +219,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) { sk->sk_route_caps = 0; kfree_skb(skb); return err; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ef249ab5c93c..58e2b0d93758 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -846,6 +846,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, int encap_limit, __u32 *pmtu) { + struct net *net = dev_net(dev); struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -861,9 +862,9 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, if ((dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); else { - dst = ip6_route_output(dev_net(dev), NULL, fl); + dst = ip6_route_output(net, NULL, fl); - if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0) + if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0) goto tx_err_link_failure; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 870a1d64605a..0f3896032830 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1466,7 +1466,7 @@ static void mld_sendpack(struct sk_buff *skb) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0); if (err) goto err_out; @@ -1831,7 +1831,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); - err = xfrm_lookup(&skb->dst, &fl, NULL, 0); + err = xfrm_lookup(net, &skb->dst, &fl, NULL, 0); if (err) goto err_out; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index af6705f03b5c..e4acc212345e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -524,7 +524,7 @@ void ndisc_send_skb(struct sk_buff *skb, return; } - err = xfrm_lookup(&dst, &fl, NULL, 0); + err = xfrm_lookup(net, &dst, &fl, NULL, 0); if (err < 0) { kfree_skb(skb); return; @@ -1524,7 +1524,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, if (dst == NULL) return; - err = xfrm_lookup(&dst, &fl, NULL, 0); + err = xfrm_lookup(net, &dst, &fl, NULL, 0); if (err) return; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index fd5b3a4e3329..627e21db65df 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -29,7 +29,7 @@ int ip6_route_me_harder(struct sk_buff *skb) #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, &fl, AF_INET6) == 0) - if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + if (xfrm_lookup(net, &skb->dst, &fl, skb->sk, 0)) return -1; #endif diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 0981b4ccb8b1..5a2d0a41694a 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -97,7 +97,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) dst = ip6_route_output(net, NULL, &fl); if (dst == NULL) return; - if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0)) + if (dst->error || xfrm_lookup(net, &dst, &fl, NULL, 0)) return; hh_len = (dst->dev->hard_header_len + 15)&~15; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 2ba04d41dc25..61f6827e5906 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -860,7 +860,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 676c80b5b14b..711175e0571f 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -259,7 +259,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out_free; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a5d750acd793..f259c9671f3e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -260,7 +260,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) @@ -390,7 +391,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) { + if ((err = xfrm_lookup(net, &dst, &fl, sk, 0)) < 0) { sk->sk_err_soft = -err; goto out; } @@ -492,7 +493,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) goto done; if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((err = xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto done; skb = tcp_make_synack(sk, dst, req); @@ -1018,7 +1019,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, * namespace */ if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { - if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { + if (xfrm_lookup(net, &buff->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); if (rst) @@ -1316,7 +1317,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0) + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) goto out; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index fd2d9ad4a8a3..38390dd19636 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -849,7 +849,8 @@ do_udp_sendmsg: if (final_p) ipv6_addr_copy(&fl.fl6_dst, final_p); - if ((err = __xfrm_lookup(&dst, &fl, sk, XFRM_LOOKUP_WAIT)) < 0) { + err = __xfrm_lookup(sock_net(sk), &dst, &fl, sk, XFRM_LOOKUP_WAIT); + if (err < 0) { if (err == -EREMOTE) err = ip6_dst_blackhole(sk, &dst, &fl); if (err < 0) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6165218fd7c2..7c88a25c7af5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -940,7 +940,8 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, return ret; } -static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, +static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, + struct flowi *fl, u16 family, u8 dir) { int err; @@ -956,7 +957,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, return NULL; read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(&init_net, daddr, saddr, family, dir); + chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); @@ -973,7 +974,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, break; } } - chain = &init_net.xfrm.policy_inexact[dir]; + chain = &net->xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, entry, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { @@ -996,14 +997,14 @@ fail: return ret; } -static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, + u8 dir, void **objp, atomic_t **obj_refp) { struct xfrm_policy *pol; int err = 0; #ifdef CONFIG_XFRM_SUB_POLICY - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); if (IS_ERR(pol)) { err = PTR_ERR(pol); pol = NULL; @@ -1011,7 +1012,7 @@ static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, if (pol || err) goto end; #endif - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); + pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); if (IS_ERR(pol)) { err = PTR_ERR(pol); pol = NULL; @@ -1537,7 +1538,7 @@ static int stale_bundle(struct dst_entry *dst); * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ -int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, +int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { struct xfrm_policy *policy; @@ -1575,10 +1576,10 @@ restart: if (!policy) { /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || - !init_net.xfrm.policy_count[XFRM_POLICY_OUT]) + !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; - policy = flow_cache_lookup(fl, dst_orig->ops->family, + policy = flow_cache_lookup(net, fl, dst_orig->ops->family, dir, xfrm_policy_lookup); err = PTR_ERR(policy); if (IS_ERR(policy)) { @@ -1635,7 +1636,8 @@ restart: #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(net, + XFRM_POLICY_TYPE_MAIN, fl, family, XFRM_POLICY_OUT); if (pols[1]) { @@ -1683,11 +1685,11 @@ restart: if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { DECLARE_WAITQUEUE(wait, current); - add_wait_queue(&init_net.xfrm.km_waitq, &wait); + add_wait_queue(&net->xfrm.km_waitq, &wait); set_current_state(TASK_INTERRUPTIBLE); schedule(); set_current_state(TASK_RUNNING); - remove_wait_queue(&init_net.xfrm.km_waitq, &wait); + remove_wait_queue(&net->xfrm.km_waitq, &wait); nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); @@ -1781,10 +1783,10 @@ nopol: } EXPORT_SYMBOL(__xfrm_lookup); -int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, +int xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { - int err = __xfrm_lookup(dst_p, fl, sk, flags); + int err = __xfrm_lookup(net, dst_p, fl, sk, flags); if (err == -EREMOTE) { dst_release(*dst_p); @@ -1936,7 +1938,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } if (!pol) - pol = flow_cache_lookup(&fl, family, fl_dir, + pol = flow_cache_lookup(&init_net, &fl, family, fl_dir, xfrm_policy_lookup); if (IS_ERR(pol)) { @@ -1959,7 +1961,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, npols ++; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(&init_net, XFRM_POLICY_TYPE_MAIN, &fl, family, XFRM_POLICY_IN); if (pols[1]) { @@ -2049,7 +2051,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) return 0; } - return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0; + return xfrm_lookup(&init_net, &skb->dst, &fl, NULL, 0) == 0; } EXPORT_SYMBOL(__xfrm_route_forward); -- cgit v1.2.3 From f6e1e25d703c0a9ba1863384a16851dec52f8e3a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:35:44 -0800 Subject: netns xfrm: xfrm_policy_check in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 ++- net/xfrm/xfrm_policy.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1dc4ff0f4851..158848f55640 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -992,12 +992,13 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, struct sk_buff *skb, unsigned int family, int reverse) { + struct net *net = dev_net(skb->dev); int ndir = dir | (reverse ? XFRM_POLICY_MASK + 1 : 0); if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); - return (!init_net.xfrm.policy_count[dir] && !skb->sp) || + return (!net->xfrm.policy_count[dir] && !skb->sp) || (skb->dst->flags & DST_NOPOLICY) || __xfrm_policy_check(sk, ndir, skb, family); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7c88a25c7af5..8097c9958cfc 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1894,6 +1894,7 @@ static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { + struct net *net = dev_net(skb->dev); struct xfrm_policy *pol; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int npols = 0; @@ -1938,7 +1939,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } if (!pol) - pol = flow_cache_lookup(&init_net, &fl, family, fl_dir, + pol = flow_cache_lookup(net, &fl, family, fl_dir, xfrm_policy_lookup); if (IS_ERR(pol)) { @@ -1961,7 +1962,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, npols ++; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(&init_net, XFRM_POLICY_TYPE_MAIN, + pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, &fl, family, XFRM_POLICY_IN); if (pols[1]) { -- cgit v1.2.3 From 99a66657b2f62ae8b2b1e6ffc6abed051e4561ca Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:36:13 -0800 Subject: netns xfrm: xfrm_route_forward() in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 +++- net/xfrm/xfrm_policy.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 158848f55640..36c8cffdf4e2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1050,7 +1050,9 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { - return !init_net.xfrm.policy_count[XFRM_POLICY_OUT] || + struct net *net = dev_net(skb->dev); + + return !net->xfrm.policy_count[XFRM_POLICY_OUT] || (skb->dst->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8097c9958cfc..54b50a20804f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2044,6 +2044,7 @@ EXPORT_SYMBOL(__xfrm_policy_check); int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) { + struct net *net = dev_net(skb->dev); struct flowi fl; if (xfrm_decode_session(skb, &fl, family) < 0) { @@ -2052,7 +2053,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) return 0; } - return xfrm_lookup(&init_net, &skb->dst, &fl, NULL, 0) == 0; + return xfrm_lookup(net, &skb->dst, &fl, NULL, 0) == 0; } EXPORT_SYMBOL(__xfrm_route_forward); -- cgit v1.2.3 From ddcfd79680c1dc74eb5f24aa70785c11bf7eec8f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:37:23 -0800 Subject: netns xfrm: dst garbage-collecting in netns Pass netns pointer to struct xfrm_policy_afinfo::garbage_collect() [This needs more thoughts on what to do with dst_ops] [Currently stub to init_net] Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/xfrm6_policy.c | 2 +- net/xfrm/xfrm_policy.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 36c8cffdf4e2..bd2515005ae2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -265,7 +265,7 @@ struct xfrm_dst; struct xfrm_policy_afinfo { unsigned short family; struct dst_ops *dst_ops; - void (*garbage_collect)(void); + void (*garbage_collect)(struct net *net); struct dst_entry *(*dst_lookup)(int tos, xfrm_address_t *saddr, xfrm_address_t *daddr); int (*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 84dbb5a03cc2..a881ca38dddb 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -187,7 +187,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm4_garbage_collect(struct dst_ops *ops) { - xfrm4_policy_afinfo.garbage_collect(); + xfrm4_policy_afinfo.garbage_collect(&init_net); return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 3b67ce7786ec..2df8a78fc3df 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -220,7 +220,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) static inline int xfrm6_garbage_collect(struct dst_ops *ops) { - xfrm6_policy_afinfo.garbage_collect(); + xfrm6_policy_afinfo.garbage_collect(&init_net); return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 49e089826f45..7ebbdd63fca0 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2173,9 +2173,9 @@ static int unused_bundle(struct dst_entry *dst) return !atomic_read(&dst->__refcnt); } -static void __xfrm_garbage_collect(void) +static void __xfrm_garbage_collect(struct net *net) { - xfrm_prune_bundles(&init_net, unused_bundle); + xfrm_prune_bundles(net, unused_bundle); } static int xfrm_flush_bundles(struct net *net) -- cgit v1.2.3 From a6483b790f8efcd8db190c1c0ff93f9d9efe919a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:38:20 -0800 Subject: netns xfrm: per-netns NETLINK_XFRM socket Stub senders to init_net's one temporarily. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/xfrm.h | 2 + include/net/xfrm.h | 7 +-- net/xfrm/xfrm_output.c | 3 +- net/xfrm/xfrm_state.c | 7 +-- net/xfrm/xfrm_user.c | 108 ++++++++++++++++++++++++++++++++--------------- 5 files changed, 83 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index c53d17357a49..09f3060e9d18 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -39,6 +39,8 @@ struct netns_xfrm { struct xfrm_policy_hash policy_bydst[XFRM_POLICY_MAX * 2]; unsigned int policy_count[XFRM_POLICY_MAX * 2]; struct work_struct policy_hash_work; + + struct sock *nlsk; }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index bd2515005ae2..e027179e8199 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -48,7 +48,6 @@ DECLARE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics); #define XFRM_INC_STATS_USER(field) #endif -extern struct sock *xfrm_nl; extern u32 sysctl_xfrm_aevent_etime; extern u32 sysctl_xfrm_aevent_rseqth; extern int sysctl_xfrm_larval_drop; @@ -1516,18 +1515,20 @@ static inline int xfrm_policy_id2dir(u32 index) return index & 7; } -static inline int xfrm_aevent_is_on(void) +#ifdef CONFIG_XFRM +static inline int xfrm_aevent_is_on(struct net *net) { struct sock *nlsk; int ret = 0; rcu_read_lock(); - nlsk = rcu_dereference(xfrm_nl); + nlsk = rcu_dereference(net->xfrm.nlsk); if (nlsk) ret = netlink_has_listeners(nlsk, XFRMNLGRP_AEVENTS); rcu_read_unlock(); return ret; } +#endif static inline int xfrm_alg_len(struct xfrm_algo *alg) { diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index dc50f1e71f76..ba90e5e50ffc 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -41,6 +41,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) { struct dst_entry *dst = skb->dst; struct xfrm_state *x = dst->xfrm; + struct net *net = xs_net(x); if (err <= 0) goto resume; @@ -74,7 +75,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) err = -EOVERFLOW; goto error; } - if (xfrm_aevent_is_on()) + if (xfrm_aevent_is_on(net)) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index ea340bbbcc64..21db37ab0a2f 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -24,9 +24,6 @@ #include "xfrm_hash.h" -struct sock *xfrm_nl; -EXPORT_SYMBOL(xfrm_nl); - u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME; EXPORT_SYMBOL(sysctl_xfrm_aevent_etime); @@ -1659,7 +1656,7 @@ static void xfrm_replay_timer_handler(unsigned long data) spin_lock(&x->lock); if (x->km.state == XFRM_STATE_VALID) { - if (xfrm_aevent_is_on()) + if (xfrm_aevent_is_on(xs_net(x))) xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT); else x->xflags |= XFRM_TIME_DEFER; @@ -1715,7 +1712,7 @@ void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) x->replay.bitmap |= (1U << diff); } - if (xfrm_aevent_is_on()) + if (xfrm_aevent_is_on(xs_net(x))) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f6e02726cf1b..8b5b01dfb77a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -703,6 +703,7 @@ nla_put_failure: static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); u32 spid = NETLINK_CB(skb).pid; @@ -715,7 +716,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, if (build_spdinfo(r_skb, spid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(xfrm_nl, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); } static inline size_t xfrm_sadinfo_msgsize(void) @@ -756,6 +757,7 @@ nla_put_failure: static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); u32 spid = NETLINK_CB(skb).pid; @@ -768,12 +770,13 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, if (build_sadinfo(r_skb, spid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(xfrm_nl, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = &init_net; struct xfrm_usersa_id *p = nlmsg_data(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; @@ -787,7 +790,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); } xfrm_state_put(x); out_noput: @@ -820,6 +823,7 @@ static int verify_userspi_info(struct xfrm_userspi_info *p) static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = &init_net; struct xfrm_state *x; struct xfrm_userspi_info *p; struct sk_buff *resp_skb; @@ -837,7 +841,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, x = NULL; if (p->info.seq) { - x = xfrm_find_acq_byseq(&init_net, p->info.seq); + x = xfrm_find_acq_byseq(net, p->info.seq); if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; @@ -845,7 +849,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, } if (!x) - x = xfrm_find_acq(&init_net, p->info.mode, p->info.reqid, + x = xfrm_find_acq(net, p->info.mode, p->info.reqid, p->info.id.proto, daddr, &p->info.saddr, 1, family); @@ -863,7 +867,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } - err = nlmsg_unicast(xfrm_nl, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); out: xfrm_state_put(x); @@ -1311,6 +1315,7 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = &init_net; struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; u8 type = XFRM_POLICY_TYPE_MAIN; @@ -1330,7 +1335,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return err; if (p->index) - xp = xfrm_policy_byid(&init_net, type, p->dir, p->index, delete, &err); + xp = xfrm_policy_byid(net, type, p->dir, p->index, delete, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1347,7 +1352,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(&init_net, type, p->dir, &p->sel, ctx, + xp = xfrm_policy_bysel_ctx(net, type, p->dir, &p->sel, ctx, delete, &err); security_xfrm_policy_free(ctx); } @@ -1361,7 +1366,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(xfrm_nl, resp_skb, + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); } } else { @@ -1457,6 +1462,7 @@ nla_put_failure: static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs) { + struct net *net = &init_net; struct xfrm_state *x; struct sk_buff *r_skb; int err; @@ -1468,7 +1474,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (r_skb == NULL) return -ENOMEM; - x = xfrm_state_lookup(&init_net, &id->daddr, id->spi, id->proto, id->family); + x = xfrm_state_lookup(net, &id->daddr, id->spi, id->proto, id->family); if (x == NULL) { kfree_skb(r_skb); return -ESRCH; @@ -1486,7 +1492,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (build_aevent(r_skb, x, &c) < 0) BUG(); - err = nlmsg_unicast(xfrm_nl, r_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).pid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; @@ -1869,6 +1875,7 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, struct xfrm_kmaddress *k) { + struct net *net = &init_net; struct sk_buff *skb; skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC); @@ -1879,7 +1886,7 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); } #else static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, @@ -1968,6 +1975,7 @@ static struct xfrm_link { static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { + struct net *net = sock_net(skb->sk); struct nlattr *attrs[XFRMA_MAX+1]; struct xfrm_link *link; int type, err; @@ -1989,7 +1997,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (link->dump == NULL) return -EINVAL; - return netlink_dump_start(xfrm_nl, skb, nlh, link->dump, link->done); + return netlink_dump_start(net->xfrm.nlsk, skb, nlh, link->dump, link->done); } err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, @@ -2033,6 +2041,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) { + struct net *net = &init_net; struct sk_buff *skb; skb = nlmsg_new(xfrm_expire_msgsize(), GFP_ATOMIC); @@ -2042,11 +2051,12 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) if (build_expire(skb, x, c) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) { + struct net *net = &init_net; struct sk_buff *skb; skb = nlmsg_new(xfrm_aevent_msgsize(), GFP_ATOMIC); @@ -2056,11 +2066,12 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) if (build_aevent(skb, x, c) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_AEVENTS, GFP_ATOMIC); } static int xfrm_notify_sa_flush(struct km_event *c) { + struct net *net = &init_net; struct xfrm_usersa_flush *p; struct nlmsghdr *nlh; struct sk_buff *skb; @@ -2081,7 +2092,7 @@ static int xfrm_notify_sa_flush(struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); } static inline size_t xfrm_sa_len(struct xfrm_state *x) @@ -2111,6 +2122,7 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) { + struct net *net = &init_net; struct xfrm_usersa_info *p; struct xfrm_usersa_id *id; struct nlmsghdr *nlh; @@ -2155,7 +2167,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); nla_put_failure: /* Somebody screwed up with xfrm_sa_len! */ @@ -2235,6 +2247,7 @@ nlmsg_failure: static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, struct xfrm_policy *xp, int dir) { + struct net *net = xs_net(x); struct sk_buff *skb; skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC); @@ -2244,7 +2257,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, if (build_acquire(skb, x, xt, xp, dir) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); } /* User gives us xfrm_user_policy_info followed by an array of 0 @@ -2344,6 +2357,7 @@ nlmsg_failure: static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) { + struct net *net = &init_net; struct sk_buff *skb; skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC); @@ -2353,11 +2367,12 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve if (build_polexpire(skb, xp, dir, c) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) { + struct net *net = &init_net; struct xfrm_userpolicy_info *p; struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; @@ -2408,7 +2423,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); @@ -2417,6 +2432,7 @@ nlmsg_failure: static int xfrm_notify_policy_flush(struct km_event *c) { + struct net *net = &init_net; struct nlmsghdr *nlh; struct sk_buff *skb; @@ -2432,7 +2448,7 @@ static int xfrm_notify_policy_flush(struct km_event *c) nlmsg_end(skb, nlh); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); @@ -2491,6 +2507,7 @@ nla_put_failure: static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { + struct net *net = &init_net; struct sk_buff *skb; skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC); @@ -2500,7 +2517,7 @@ static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, if (build_report(skb, proto, sel, addr) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); } static inline size_t xfrm_mapping_msgsize(void) @@ -2536,6 +2553,7 @@ static int build_mapping(struct sk_buff *skb, struct xfrm_state *x, static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) { + struct net *net = xs_net(x); struct sk_buff *skb; if (x->id.proto != IPPROTO_ESP) @@ -2551,7 +2569,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, if (build_mapping(skb, x, ipaddr, sport) < 0) BUG(); - return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC); + return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC); } static struct xfrm_mgr netlink_mgr = { @@ -2565,33 +2583,53 @@ static struct xfrm_mgr netlink_mgr = { .new_mapping = xfrm_send_mapping, }; -static int __init xfrm_user_init(void) +static int __net_init xfrm_user_net_init(struct net *net) { struct sock *nlsk; - printk(KERN_INFO "Initializing XFRM netlink socket\n"); - - nlsk = netlink_kernel_create(&init_net, NETLINK_XFRM, XFRMNLGRP_MAX, + nlsk = netlink_kernel_create(net, NETLINK_XFRM, XFRMNLGRP_MAX, xfrm_netlink_rcv, NULL, THIS_MODULE); if (nlsk == NULL) return -ENOMEM; - rcu_assign_pointer(xfrm_nl, nlsk); - - xfrm_register_km(&netlink_mgr); - + rcu_assign_pointer(net->xfrm.nlsk, nlsk); return 0; } -static void __exit xfrm_user_exit(void) +static void __net_exit xfrm_user_net_exit(struct net *net) { - struct sock *nlsk = xfrm_nl; + struct sock *nlsk = net->xfrm.nlsk; - xfrm_unregister_km(&netlink_mgr); - rcu_assign_pointer(xfrm_nl, NULL); + rcu_assign_pointer(net->xfrm.nlsk, NULL); synchronize_rcu(); netlink_kernel_release(nlsk); } +static struct pernet_operations xfrm_user_net_ops = { + .init = xfrm_user_net_init, + .exit = xfrm_user_net_exit, +}; + +static int __init xfrm_user_init(void) +{ + int rv; + + printk(KERN_INFO "Initializing XFRM netlink socket\n"); + + rv = register_pernet_subsys(&xfrm_user_net_ops); + if (rv < 0) + return rv; + rv = xfrm_register_km(&netlink_mgr); + if (rv < 0) + unregister_pernet_subsys(&xfrm_user_net_ops); + return rv; +} + +static void __exit xfrm_user_exit(void) +{ + xfrm_unregister_km(&netlink_mgr); + unregister_pernet_subsys(&xfrm_user_net_ops); +} + module_init(xfrm_user_init); module_exit(xfrm_user_exit); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 7067802e262457a9737521e5669b622028b2283a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:50:36 -0800 Subject: netns xfrm: pass netns with KM notifications SA and SPD flush are executed with NULL SA and SPD respectively, for these cases pass netns explicitly from userspace socket. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 + net/key/af_key.c | 2 ++ net/xfrm/xfrm_user.c | 6 ++++-- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e027179e8199..52e784fa2c50 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -256,6 +256,7 @@ struct km_event u32 seq; u32 pid; u32 event; + struct net *net; }; struct net_device; diff --git a/net/key/af_key.c b/net/key/af_key.c index a0d849848ddd..ea7755ab7e6a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1739,6 +1739,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd c.seq = hdr->sadb_msg_seq; c.pid = hdr->sadb_msg_pid; c.event = XFRM_MSG_FLUSHSA; + c.net = &init_net; km_state_notify(NULL, &c); return 0; @@ -2693,6 +2694,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg c.event = XFRM_MSG_FLUSHPOLICY; c.pid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; + c.net = &init_net; km_policy_notify(NULL, 0, &c); return 0; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ab8b138e5e2f..3e32ec2ea1ad 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1418,6 +1418,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.pid = nlh->nlmsg_pid; + c.net = net; km_state_notify(NULL, &c); return 0; @@ -1569,6 +1570,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.pid = nlh->nlmsg_pid; + c.net = net; km_policy_notify(NULL, 0, &c); return 0; } @@ -2084,7 +2086,7 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, struct km_event *c) static int xfrm_notify_sa_flush(struct km_event *c) { - struct net *net = &init_net; + struct net *net = c->net; struct xfrm_usersa_flush *p; struct nlmsghdr *nlh; struct sk_buff *skb; @@ -2446,7 +2448,7 @@ nlmsg_failure: static int xfrm_notify_policy_flush(struct km_event *c) { - struct net *net = &init_net; + struct net *net = c->net; struct nlmsghdr *nlh; struct sk_buff *skb; -- cgit v1.2.3 From db983c1144884cab10d6397532f4bf05eb0c01d2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:51:01 -0800 Subject: netns xfrm: KM reporting in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++-- net/ipv6/mip6.c | 3 ++- net/xfrm/xfrm_state.c | 4 ++-- net/xfrm/xfrm_user.c | 5 ++--- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 52e784fa2c50..f3ea1607c595 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -552,7 +552,7 @@ struct xfrm_mgr struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); - int (*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); + int (*report)(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); int (*migrate)(struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k); }; @@ -1471,7 +1471,7 @@ extern int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid); -extern int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); +extern int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); extern void xfrm_input_init(void); extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 31295c8f6196..f995e19c87a9 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -205,6 +205,7 @@ static inline int mip6_report_rl_allow(struct timeval *stamp, static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl) { + struct net *net = xs_net(x); struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; struct ipv6_destopt_hao *hao = NULL; struct xfrm_selector sel; @@ -247,7 +248,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct sel.sport_mask = htons(~0); sel.ifindex = fl->oif; - err = km_report(IPPROTO_DSTOPTS, &sel, + err = km_report(net, IPPROTO_DSTOPTS, &sel, (hao ? (xfrm_address_t *)&hao->addr : NULL)); out: diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 21db37ab0a2f..d594b5af5f6b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1833,7 +1833,7 @@ int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, EXPORT_SYMBOL(km_migrate); #endif -int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) +int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { int err = -EINVAL; int ret; @@ -1842,7 +1842,7 @@ int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { if (km->report) { - ret = km->report(proto, sel, addr); + ret = km->report(net, proto, sel, addr); if (!ret) err = ret; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 3e32ec2ea1ad..b7240d5b77ad 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2520,10 +2520,9 @@ nla_put_failure: return -EMSGSIZE; } -static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, - xfrm_address_t *addr) +static int xfrm_send_report(struct net *net, u8 proto, + struct xfrm_selector *sel, xfrm_address_t *addr) { - struct net *net = &init_net; struct sk_buff *skb; skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC); -- cgit v1.2.3 From c5b3cf46eabe6e7459125fc6e2033b4222665017 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:51:25 -0800 Subject: netns xfrm: ->dst_lookup in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 ++- net/ipv4/xfrm4_policy.c | 7 ++++--- net/ipv6/xfrm6_policy.c | 7 ++++--- net/xfrm/xfrm_policy.c | 7 ++++--- 4 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index f3ea1607c595..b16d4c0b16e0 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -266,7 +266,8 @@ struct xfrm_policy_afinfo { unsigned short family; struct dst_ops *dst_ops; void (*garbage_collect)(struct net *net); - struct dst_entry *(*dst_lookup)(int tos, xfrm_address_t *saddr, + struct dst_entry *(*dst_lookup)(struct net *net, int tos, + xfrm_address_t *saddr, xfrm_address_t *daddr); int (*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr); struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index a881ca38dddb..e1cf9ed07d49 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -18,7 +18,8 @@ static struct dst_ops xfrm4_dst_ops; static struct xfrm_policy_afinfo xfrm4_policy_afinfo; -static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr, +static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct flowi fl = { @@ -36,7 +37,7 @@ static struct dst_entry *xfrm4_dst_lookup(int tos, xfrm_address_t *saddr, if (saddr) fl.fl4_src = saddr->a4; - err = __ip_route_output_key(&init_net, &rt, &fl); + err = __ip_route_output_key(net, &rt, &fl); dst = &rt->u.dst; if (err) dst = ERR_PTR(err); @@ -48,7 +49,7 @@ static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) struct dst_entry *dst; struct rtable *rt; - dst = xfrm4_dst_lookup(0, NULL, daddr); + dst = xfrm4_dst_lookup(&init_net, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 2df8a78fc3df..d7a5b8bc3770 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -27,7 +27,8 @@ static struct dst_ops xfrm6_dst_ops; static struct xfrm_policy_afinfo xfrm6_policy_afinfo; -static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, +static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct flowi fl = {}; @@ -38,7 +39,7 @@ static struct dst_entry *xfrm6_dst_lookup(int tos, xfrm_address_t *saddr, if (saddr) memcpy(&fl.fl6_src, saddr, sizeof(fl.fl6_src)); - dst = ip6_route_output(&init_net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl); err = dst->error; if (dst->error) { @@ -54,7 +55,7 @@ static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) struct dst_entry *dst; struct net_device *dev; - dst = xfrm6_dst_lookup(0, NULL, daddr); + dst = xfrm6_dst_lookup(&init_net, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7ebbdd63fca0..2b0a80b6259b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -92,7 +92,7 @@ int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl, return 0; } -static inline struct dst_entry *__xfrm_dst_lookup(int tos, +static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, xfrm_address_t *saddr, xfrm_address_t *daddr, int family) @@ -104,7 +104,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(int tos, if (unlikely(afinfo == NULL)) return ERR_PTR(-EAFNOSUPPORT); - dst = afinfo->dst_lookup(tos, saddr, daddr); + dst = afinfo->dst_lookup(net, tos, saddr, daddr); xfrm_policy_put_afinfo(afinfo); @@ -116,6 +116,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, xfrm_address_t *prev_daddr, int family) { + struct net *net = xs_net(x); xfrm_address_t *saddr = &x->props.saddr; xfrm_address_t *daddr = &x->id.daddr; struct dst_entry *dst; @@ -129,7 +130,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, daddr = x->coaddr; } - dst = __xfrm_dst_lookup(tos, saddr, daddr, family); + dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); if (!IS_ERR(dst)) { if (prev_saddr != saddr) -- cgit v1.2.3 From fbda33b2b85941c1ae3a0d89522dec5c1b1bd98c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:56:49 -0800 Subject: netns xfrm: ->get_saddr in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/ipv4/xfrm4_policy.c | 5 +++-- net/ipv6/xfrm6_policy.c | 5 +++-- net/xfrm/xfrm_policy.c | 7 ++++--- 4 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b16d4c0b16e0..d076f3d34278 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -269,7 +269,7 @@ struct xfrm_policy_afinfo { struct dst_entry *(*dst_lookup)(struct net *net, int tos, xfrm_address_t *saddr, xfrm_address_t *daddr); - int (*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr); + int (*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr); struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy); void (*decode_session)(struct sk_buff *skb, struct flowi *fl, diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index e1cf9ed07d49..2ad24ba31f9d 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -44,12 +44,13 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, return dst; } -static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +static int xfrm4_get_saddr(struct net *net, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; struct rtable *rt; - dst = xfrm4_dst_lookup(&init_net, 0, NULL, daddr); + dst = xfrm4_dst_lookup(net, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index d7a5b8bc3770..97ab068e8ccc 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -50,12 +50,13 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, return dst; } -static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +static int xfrm6_get_saddr(struct net *net, + xfrm_address_t *saddr, xfrm_address_t *daddr) { struct dst_entry *dst; struct net_device *dev; - dst = xfrm6_dst_lookup(&init_net, 0, NULL, daddr); + dst = xfrm6_dst_lookup(net, 0, NULL, daddr); if (IS_ERR(dst)) return -EHOSTUNREACH; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2b0a80b6259b..7c7bb54f2265 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1187,7 +1187,7 @@ int __xfrm_sk_clone_policy(struct sock *sk) } static int -xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, +xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, unsigned short family) { int err; @@ -1195,7 +1195,7 @@ xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(local, remote); + err = afinfo->get_saddr(net, local, remote); xfrm_policy_put_afinfo(afinfo); return err; } @@ -1207,6 +1207,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, struct xfrm_state **xfrm, unsigned short family) { + struct net *net = xp_net(policy); int nx; int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); @@ -1225,7 +1226,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, local = &tmpl->saddr; family = tmpl->encap_family; if (xfrm_addr_any(local, family)) { - error = xfrm_get_saddr(&tmp, remote, family); + error = xfrm_get_saddr(net, &tmp, remote, family); if (error) goto fail; local = &tmp; -- cgit v1.2.3 From 59c9940ed0ef026673cac52f2eaed77af7d486da Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 17:59:52 -0800 Subject: netns xfrm: per-netns MIBs Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/netns/mib.h | 3 ++ include/net/xfrm.h | 13 ++++---- net/ipv6/xfrm6_input.c | 9 +++--- net/xfrm/xfrm_input.c | 22 ++++++------- net/xfrm/xfrm_output.c | 15 ++++----- net/xfrm/xfrm_policy.c | 82 ++++++++++++++++++++++++++++--------------------- net/xfrm/xfrm_proc.c | 2 +- 7 files changed, 81 insertions(+), 65 deletions(-) (limited to 'include') diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h index 10cb7c336de5..0b44112e2366 100644 --- a/include/net/netns/mib.h +++ b/include/net/netns/mib.h @@ -20,6 +20,9 @@ struct netns_mib { DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); #endif +#ifdef CONFIG_XFRM_STATISTICS + DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics); +#endif }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d076f3d34278..78ec3e8a95ed 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -38,14 +38,13 @@ MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto)) #ifdef CONFIG_XFRM_STATISTICS -DECLARE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics); -#define XFRM_INC_STATS(field) SNMP_INC_STATS(xfrm_statistics, field) -#define XFRM_INC_STATS_BH(field) SNMP_INC_STATS_BH(xfrm_statistics, field) -#define XFRM_INC_STATS_USER(field) SNMP_INC_STATS_USER(xfrm_statistics, field) +#define XFRM_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.xfrm_statistics, field) +#define XFRM_INC_STATS_BH(net, field) SNMP_INC_STATS_BH((net)->mib.xfrm_statistics, field) +#define XFRM_INC_STATS_USER(net, field) SNMP_INC_STATS_USER((net)-mib.xfrm_statistics, field) #else -#define XFRM_INC_STATS(field) -#define XFRM_INC_STATS_BH(field) -#define XFRM_INC_STATS_USER(field) +#define XFRM_INC_STATS(net, field) ((void)(net)) +#define XFRM_INC_STATS_BH(net, field) ((void)(net)) +#define XFRM_INC_STATS_USER(net, field) ((void)(net)) #endif extern u32 sysctl_xfrm_aevent_etime; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index b69766a77743..9084582d236b 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -58,6 +58,7 @@ EXPORT_SYMBOL(xfrm6_rcv); int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto) { + struct net *net = dev_net(skb->dev); struct xfrm_state *x = NULL; int i = 0; @@ -67,7 +68,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, sp = secpath_dup(skb->sp); if (!sp) { - XFRM_INC_STATS(LINUX_MIB_XFRMINERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); goto drop; } if (skb->sp) @@ -76,7 +77,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, } if (1 + skb->sp->len == XFRM_MAX_DEPTH) { - XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } @@ -100,7 +101,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, break; } - x = xfrm_state_lookup_byaddr(&init_net, dst, src, proto, AF_INET6); + x = xfrm_state_lookup_byaddr(net, dst, src, proto, AF_INET6); if (!x) continue; @@ -122,7 +123,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, } if (!x) { - XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound_simple(skb, AF_INET6); goto drop; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index a714dce03dc4..b4a13178fb40 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -128,7 +128,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) sp = secpath_dup(skb->sp); if (!sp) { - XFRM_INC_STATS(LINUX_MIB_XFRMINERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINERROR); goto drop; } if (skb->sp) @@ -142,19 +142,19 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) seq = 0; if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } do { if (skb->sp->len == XFRM_MAX_DEPTH) { - XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } x = xfrm_state_lookup(net, daddr, spi, nexthdr, family); if (x == NULL) { - XFRM_INC_STATS(LINUX_MIB_XFRMINNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound(skb, family, spi, seq); goto drop; } @@ -163,22 +163,22 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) spin_lock(&x->lock); if (unlikely(x->km.state != XFRM_STATE_VALID)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEINVALID); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); goto drop_unlock; } if ((x->encap ? x->encap->encap_type : 0) != encap_type) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); goto drop_unlock; } if (x->props.replay_window && xfrm_replay_check(x, skb, seq)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATESEQERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR); goto drop_unlock; } if (xfrm_state_check_expire(x)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEEXPIRED); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEEXPIRED); goto drop_unlock; } @@ -199,7 +199,7 @@ resume: x->type->proto); x->stats.integrity_failed++; } - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEPROTOERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); goto drop_unlock; } @@ -225,7 +225,7 @@ resume: } if (inner_mode->input(x, skb)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMODEERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } @@ -243,7 +243,7 @@ resume: err = xfrm_parse_spi(skb, nexthdr, &spi, &seq); if (err < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); goto drop; } } while (!err); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index ba90e5e50ffc..c235597ba8dd 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -49,27 +49,27 @@ static int xfrm_output_one(struct sk_buff *skb, int err) do { err = xfrm_state_check_space(x, skb); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); goto error_nolock; } err = x->outer_mode->output(x, skb); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEMODEERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); goto error_nolock; } spin_lock_bh(&x->lock); err = xfrm_state_check_expire(x); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEEXPIRED); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); goto error; } if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { XFRM_SKB_CB(skb)->seq.output = ++x->replay.oseq; if (unlikely(x->replay.oseq == 0)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATESEQERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); x->replay.oseq--; xfrm_audit_state_replay_overflow(x, skb); err = -EOVERFLOW; @@ -90,12 +90,12 @@ static int xfrm_output_one(struct sk_buff *skb, int err) resume: if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEPROTOERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); goto error_nolock; } if (!(skb->dst = dst_pop(dst))) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); err = -EHOSTUNREACH; goto error_nolock; } @@ -179,6 +179,7 @@ static int xfrm_output_gso(struct sk_buff *skb) int xfrm_output(struct sk_buff *skb) { + struct net *net = dev_net(skb->dst->dev); int err; if (skb_is_gso(skb)) @@ -187,7 +188,7 @@ int xfrm_output(struct sk_buff *skb) if (skb->ip_summed == CHECKSUM_PARTIAL) { err = skb_checksum_help(skb); if (err) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); kfree_skb(skb); return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index fcf8c928285a..e239a25e571c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -36,11 +36,6 @@ int sysctl_xfrm_larval_drop __read_mostly = 1; -#ifdef CONFIG_XFRM_STATISTICS -DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly; -EXPORT_SYMBOL(xfrm_statistics); -#endif - DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); @@ -1570,7 +1565,7 @@ restart: policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); err = PTR_ERR(policy); if (IS_ERR(policy)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); goto dropdst; } } @@ -1585,7 +1580,7 @@ restart: dir, xfrm_policy_lookup); err = PTR_ERR(policy); if (IS_ERR(policy)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); goto dropdst; } } @@ -1608,7 +1603,7 @@ restart: default: case XFRM_POLICY_BLOCK: /* Prohibit the flow */ - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); err = -EPERM; goto error; @@ -1628,7 +1623,7 @@ restart: */ dst = xfrm_find_bundle(fl, policy, family); if (IS_ERR(dst)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); err = PTR_ERR(dst); goto error; } @@ -1644,12 +1639,12 @@ restart: XFRM_POLICY_OUT); if (pols[1]) { if (IS_ERR(pols[1])) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); err = PTR_ERR(pols[1]); goto error; } if (pols[1]->action == XFRM_POLICY_BLOCK) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); err = -EPERM; goto error; } @@ -1680,7 +1675,7 @@ restart: /* EREMOTE tells the caller to generate * a one-shot blackhole route. */ - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); xfrm_pol_put(policy); return -EREMOTE; } @@ -1696,7 +1691,7 @@ restart: nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (nx == -EAGAIN && signal_pending(current)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); err = -ERESTART; goto error; } @@ -1708,7 +1703,7 @@ restart: err = nx; } if (err < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); goto error; } } @@ -1721,7 +1716,7 @@ restart: dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig); err = PTR_ERR(dst); if (IS_ERR(dst)) { - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); goto error; } @@ -1742,9 +1737,9 @@ restart: dst_free(dst); if (pol_dead) - XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD); else - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); err = -EHOSTUNREACH; goto error; } @@ -1756,7 +1751,7 @@ restart: if (unlikely(err)) { write_unlock_bh(&policy->lock); dst_free(dst); - XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); goto error; } @@ -1912,7 +1907,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, fl_dir = policy_to_flow_dir(dir); if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; } @@ -1925,7 +1920,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, for (i=skb->sp->len-1; i>=0; i--) { struct xfrm_state *x = skb->sp->xvec[i]; if (!xfrm_selector_match(&x->sel, &fl, family)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); return 0; } } @@ -1935,7 +1930,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (sk && sk->sk_policy[dir]) { pol = xfrm_sk_policy_lookup(sk, dir, &fl); if (IS_ERR(pol)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } } @@ -1945,14 +1940,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, xfrm_policy_lookup); if (IS_ERR(pol)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } if (!pol) { if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { xfrm_secpath_reject(xerr_idx, skb, &fl); - XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; } return 1; @@ -1969,7 +1964,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, XFRM_POLICY_IN); if (pols[1]) { if (IS_ERR(pols[1])) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } pols[1]->curlft.use_time = get_seconds(); @@ -1993,11 +1988,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, for (pi = 0; pi < npols; pi++) { if (pols[pi] != pol && pols[pi]->action != XFRM_POLICY_ALLOW) { - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); goto reject; } if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { - XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto reject_error; } for (i = 0; i < pols[pi]->xfrm_nr; i++) @@ -2021,20 +2016,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (k < -1) /* "-2 - errored_index" returned */ xerr_idx = -(2+k); - XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } } if (secpath_has_nontransport(sp, k, &xerr_idx)) { - XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } xfrm_pols_put(pols, npols); return 1; } - XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); reject: xfrm_secpath_reject(xerr_idx, skb, &fl); @@ -2051,7 +2046,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) if (xfrm_decode_session(skb, &fl, family) < 0) { /* XXX: we should have something like FWDHDRERROR here. */ - XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; } @@ -2380,13 +2375,27 @@ static struct notifier_block xfrm_dev_notifier = { }; #ifdef CONFIG_XFRM_STATISTICS -static int __init xfrm_statistics_init(void) +static int __net_init xfrm_statistics_init(struct net *net) { - if (snmp_mib_init((void **)xfrm_statistics, + if (snmp_mib_init((void **)net->mib.xfrm_statistics, sizeof(struct linux_xfrm_mib)) < 0) return -ENOMEM; return 0; } + +static void xfrm_statistics_fini(struct net *net) +{ + snmp_mib_free((void **)net->mib.xfrm_statistics); +} +#else +static int __net_init xfrm_statistics_init(struct net *net) +{ + return 0; +} + +static void xfrm_statistics_fini(struct net *net) +{ +} #endif static int __net_init xfrm_policy_init(struct net *net) @@ -2480,6 +2489,9 @@ static int __net_init xfrm_net_init(struct net *net) { int rv; + rv = xfrm_statistics_init(net); + if (rv < 0) + goto out_statistics; rv = xfrm_state_init(net); if (rv < 0) goto out_state; @@ -2491,6 +2503,8 @@ static int __net_init xfrm_net_init(struct net *net) out_policy: xfrm_state_fini(net); out_state: + xfrm_statistics_fini(net); +out_statistics: return rv; } @@ -2498,6 +2512,7 @@ static void __net_exit xfrm_net_exit(struct net *net) { xfrm_policy_fini(net); xfrm_state_fini(net); + xfrm_statistics_fini(net); } static struct pernet_operations __net_initdata xfrm_net_ops = { @@ -2508,9 +2523,6 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { register_pernet_subsys(&xfrm_net_ops); -#ifdef CONFIG_XFRM_STATISTICS - xfrm_statistics_init(); -#endif xfrm_input_init(); #ifdef CONFIG_XFRM_STATISTICS xfrm_proc_init(); diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 2b0db13f0cda..27a2ab92d874 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -62,7 +62,7 @@ static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) int i; for (i=0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - fold_field((void **)xfrm_statistics, + fold_field((void **)init_net.mib.xfrm_statistics, xfrm_mib_list[i].entry)); return 0; } -- cgit v1.2.3 From c68cd1a01ba56995d85a4a62b195b2b3f6415c64 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 18:00:14 -0800 Subject: netns xfrm: /proc/net/xfrm_stat in netns Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 ++- net/xfrm/xfrm_policy.c | 11 +++++++---- net/xfrm/xfrm_proc.c | 26 ++++++++++++-------------- 3 files changed, 21 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 78ec3e8a95ed..1554ccd0c940 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1306,7 +1306,8 @@ static inline void xfrm6_fini(void) #endif #ifdef CONFIG_XFRM_STATISTICS -extern int xfrm_proc_init(void); +extern int xfrm_proc_init(struct net *net); +extern void xfrm_proc_fini(struct net *net); #endif extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index e239a25e571c..38822b34ba7d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2377,14 +2377,20 @@ static struct notifier_block xfrm_dev_notifier = { #ifdef CONFIG_XFRM_STATISTICS static int __net_init xfrm_statistics_init(struct net *net) { + int rv; + if (snmp_mib_init((void **)net->mib.xfrm_statistics, sizeof(struct linux_xfrm_mib)) < 0) return -ENOMEM; - return 0; + rv = xfrm_proc_init(net); + if (rv < 0) + snmp_mib_free((void **)net->mib.xfrm_statistics); + return rv; } static void xfrm_statistics_fini(struct net *net) { + xfrm_proc_fini(net); snmp_mib_free((void **)net->mib.xfrm_statistics); } #else @@ -2524,9 +2530,6 @@ void __init xfrm_init(void) { register_pernet_subsys(&xfrm_net_ops); xfrm_input_init(); -#ifdef CONFIG_XFRM_STATISTICS - xfrm_proc_init(); -#endif } #ifdef CONFIG_AUDITSYSCALL diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 27a2ab92d874..284eaef1dbf2 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -59,17 +59,18 @@ fold_field(void *mib[], int offt) static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq->private; int i; for (i=0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - fold_field((void **)init_net.mib.xfrm_statistics, + fold_field((void **)net->mib.xfrm_statistics, xfrm_mib_list[i].entry)); return 0; } static int xfrm_statistics_seq_open(struct inode *inode, struct file *file) { - return single_open(file, xfrm_statistics_seq_show, NULL); + return single_open_net(inode, file, xfrm_statistics_seq_show); } static struct file_operations xfrm_statistics_seq_fops = { @@ -77,21 +78,18 @@ static struct file_operations xfrm_statistics_seq_fops = { .open = xfrm_statistics_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = single_release_net, }; -int __init xfrm_proc_init(void) +int __net_init xfrm_proc_init(struct net *net) { - int rc = 0; - - if (!proc_net_fops_create(&init_net, "xfrm_stat", S_IRUGO, + if (!proc_net_fops_create(net, "xfrm_stat", S_IRUGO, &xfrm_statistics_seq_fops)) - goto stat_fail; - - out: - return rc; + return -ENOMEM; + return 0; +} - stat_fail: - rc = -ENOMEM; - goto out; +void xfrm_proc_fini(struct net *net) +{ + proc_net_remove(net, "xfrm_stat"); } -- cgit v1.2.3 From b27aeadb5948d400df83db4d29590fb9862ba49d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 25 Nov 2008 18:00:48 -0800 Subject: netns xfrm: per-netns sysctls Make net.core.xfrm_aevent_etime net.core.xfrm_acq_expires net.core.xfrm_aevent_rseqth net.core.xfrm_larval_drop sysctls per-netns. For that make net_core_path[] global, register it to prevent two /proc/net/core antries and change initcall position -- xfrm_init() is called from fs_initcall, so this one should be fs_initcall at least. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/net/ip.h | 1 + include/net/netns/xfrm.h | 10 ++++++ include/net/xfrm.h | 14 +++++--- net/core/sysctl_net_core.c | 42 +++-------------------- net/xfrm/Makefile | 4 +-- net/xfrm/xfrm_policy.c | 10 ++++-- net/xfrm/xfrm_state.c | 16 +++------ net/xfrm/xfrm_sysctl.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_user.c | 4 +-- 9 files changed, 125 insertions(+), 61 deletions(-) create mode 100644 net/xfrm/xfrm_sysctl.c (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index ddef10c22e3a..10868139e656 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -187,6 +187,7 @@ extern void inet_get_local_port_range(int *low, int *high); extern int sysctl_ip_default_ttl; extern int sysctl_ip_nonlocal_bind; +extern struct ctl_path net_core_path[]; extern struct ctl_path net_ipv4_ctl_path[]; /* From inetpeer.c */ diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 09f3060e9d18..1ba912749caa 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -6,6 +6,8 @@ #include #include +struct ctl_table_header; + struct xfrm_policy_hash { struct hlist_head *table; unsigned int hmask; @@ -41,6 +43,14 @@ struct netns_xfrm { struct work_struct policy_hash_work; struct sock *nlsk; + + u32 sysctl_aevent_etime; + u32 sysctl_aevent_rseqth; + int sysctl_larval_drop; + u32 sysctl_acq_expires; +#ifdef CONFIG_SYSCTL + struct ctl_table_header *sysctl_hdr; +#endif }; #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1554ccd0c940..2e9f5c0018ae 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -47,11 +47,6 @@ #define XFRM_INC_STATS_USER(net, field) ((void)(net)) #endif -extern u32 sysctl_xfrm_aevent_etime; -extern u32 sysctl_xfrm_aevent_rseqth; -extern int sysctl_xfrm_larval_drop; -extern u32 sysctl_xfrm_acq_expires; - extern struct mutex xfrm_cfg_mutex; /* Organization of SPD aka "XFRM rules" @@ -1310,6 +1305,15 @@ extern int xfrm_proc_init(struct net *net); extern void xfrm_proc_fini(struct net *net); #endif +extern int xfrm_sysctl_init(struct net *net); +#ifdef CONFIG_SYSCTL +extern void xfrm_sysctl_fini(struct net *net); +#else +static inline void xfrm_sysctl_fini(struct net *net) +{ +} +#endif + extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto); extern int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 2bc0384b0448..83d3398559ea 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -12,7 +12,6 @@ #include #include #include -#include static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET @@ -89,40 +88,6 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, -#ifdef CONFIG_XFRM - { - .ctl_name = NET_CORE_AEVENT_ETIME, - .procname = "xfrm_aevent_etime", - .data = &sysctl_xfrm_aevent_etime, - .maxlen = sizeof(u32), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .ctl_name = NET_CORE_AEVENT_RSEQTH, - .procname = "xfrm_aevent_rseqth", - .data = &sysctl_xfrm_aevent_rseqth, - .maxlen = sizeof(u32), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "xfrm_larval_drop", - .data = &sysctl_xfrm_larval_drop, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "xfrm_acq_expires", - .data = &sysctl_xfrm_acq_expires, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, -#endif /* CONFIG_XFRM */ #endif /* CONFIG_NET */ { .ctl_name = NET_CORE_BUDGET, @@ -155,7 +120,7 @@ static struct ctl_table netns_core_table[] = { { .ctl_name = 0 } }; -static __net_initdata struct ctl_path net_core_path[] = { +__net_initdata struct ctl_path net_core_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "core", .ctl_name = NET_CORE, }, { }, @@ -207,8 +172,11 @@ static __net_initdata struct pernet_operations sysctl_core_ops = { static __init int sysctl_core_init(void) { + static struct ctl_table empty[1]; + + register_sysctl_paths(net_core_path, empty); register_net_sysctl_rotable(net_core_path, net_core_table); return register_pernet_subsys(&sysctl_core_ops); } -__initcall(sysctl_core_init); +fs_initcall(sysctl_core_init); diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 0f439a72ccab..c631047e1b27 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -3,8 +3,8 @@ # obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ - xfrm_input.o xfrm_output.o xfrm_algo.o + xfrm_input.o xfrm_output.o xfrm_algo.o \ + xfrm_sysctl.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o - diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 38822b34ba7d..393cc65dbfa4 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -34,8 +34,6 @@ #include "xfrm_hash.h" -int sysctl_xfrm_larval_drop __read_mostly = 1; - DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); @@ -1671,7 +1669,7 @@ restart: if (unlikely(nx<0)) { err = nx; - if (err == -EAGAIN && sysctl_xfrm_larval_drop) { + if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) { /* EREMOTE tells the caller to generate * a one-shot blackhole route. */ @@ -2504,8 +2502,13 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_policy_init(net); if (rv < 0) goto out_policy; + rv = xfrm_sysctl_init(net); + if (rv < 0) + goto out_sysctl; return 0; +out_sysctl: + xfrm_policy_fini(net); out_policy: xfrm_state_fini(net); out_state: @@ -2516,6 +2519,7 @@ out_statistics: static void __net_exit xfrm_net_exit(struct net *net) { + xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); xfrm_statistics_fini(net); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 662e47b0bcc3..2fd57f8f77c1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -24,14 +24,6 @@ #include "xfrm_hash.h" -u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME; -EXPORT_SYMBOL(sysctl_xfrm_aevent_etime); - -u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE; -EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); - -u32 sysctl_xfrm_acq_expires __read_mostly = 30; - /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -851,8 +843,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family); hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } - x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; - x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; + x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; + x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ; add_timer(&x->timer); net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); @@ -1040,9 +1032,9 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family x->props.family = family; x->props.mode = mode; x->props.reqid = reqid; - x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; + x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; xfrm_state_hold(x); - x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; + x->timer.expires = jiffies + net->xfrm.sysctl_acq_expires*HZ; add_timer(&x->timer); list_add(&x->km.all, &net->xfrm.state_all); hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c new file mode 100644 index 000000000000..2e6ffb66f06f --- /dev/null +++ b/net/xfrm/xfrm_sysctl.c @@ -0,0 +1,85 @@ +#include +#include +#include + +static void __xfrm_sysctl_init(struct net *net) +{ + net->xfrm.sysctl_aevent_etime = XFRM_AE_ETIME; + net->xfrm.sysctl_aevent_rseqth = XFRM_AE_SEQT_SIZE; + net->xfrm.sysctl_larval_drop = 1; + net->xfrm.sysctl_acq_expires = 30; +} + +#ifdef CONFIG_SYSCTL +static struct ctl_table xfrm_table[] = { + { + .ctl_name = NET_CORE_AEVENT_ETIME, + .procname = "xfrm_aevent_etime", + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = NET_CORE_AEVENT_RSEQTH, + .procname = "xfrm_aevent_rseqth", + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "xfrm_larval_drop", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "xfrm_acq_expires", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + {} +}; + +int __net_init xfrm_sysctl_init(struct net *net) +{ + struct ctl_table *table; + + __xfrm_sysctl_init(net); + + table = kmemdup(xfrm_table, sizeof(xfrm_table), GFP_KERNEL); + if (!table) + goto out_kmemdup; + table[0].data = &net->xfrm.sysctl_aevent_etime; + table[1].data = &net->xfrm.sysctl_aevent_rseqth; + table[2].data = &net->xfrm.sysctl_larval_drop; + table[3].data = &net->xfrm.sysctl_acq_expires; + + net->xfrm.sysctl_hdr = register_net_sysctl_table(net, net_core_path, table); + if (!net->xfrm.sysctl_hdr) + goto out_register; + return 0; + +out_register: + kfree(table); +out_kmemdup: + return -ENOMEM; +} + +void xfrm_sysctl_fini(struct net *net) +{ + struct ctl_table *table; + + table = net->xfrm.sysctl_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->xfrm.sysctl_hdr); + kfree(table); +} +#else +int __net_init xfrm_sysctl_init(struct net *net) +{ + __xfrm_sysctl_init(net); + return 0; +} +#endif diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b7240d5b77ad..38ffaf33312e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -368,9 +368,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; x->km.seq = p->seq; - x->replay_maxdiff = sysctl_xfrm_aevent_rseqth; + x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth; /* sysctl_xfrm_aevent_etime is in 100ms units */ - x->replay_maxage = (sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M; + x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; x->preplay.bitmap = 0; x->preplay.seq = x->replay.seq+x->replay_maxdiff; x->preplay.oseq = x->replay.oseq +x->replay_maxdiff; -- cgit v1.2.3 From c1b56878fb68e9c14070939ea4537ad4db79ffae Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 25 Nov 2008 21:14:06 -0800 Subject: tc: policing requires a rate estimator Found that while trying average rate policing, it was possible to request average rate policing without a rate estimator. This results in no policing which is harmless but incorrect. Since policing could be setup in two steps, need to check in the kernel. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/gen_stats.h | 1 + net/core/gen_estimator.c | 30 +++++++++++++++++++++++++++--- net/sched/act_police.c | 6 ++++++ 3 files changed, 34 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index 8cd8185fa2ed..dcf5bfa7d4f1 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -45,5 +45,6 @@ extern void gen_kill_estimator(struct gnet_stats_basic *bstats, extern int gen_replace_estimator(struct gnet_stats_basic *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt); +extern int gen_estimator_active(const struct gnet_stats_rate_est *rate_est); #endif diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 80aa160877e9..3885550f0187 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -242,6 +242,7 @@ int gen_new_estimator(struct gnet_stats_basic *bstats, return 0; } +EXPORT_SYMBOL(gen_new_estimator); static void __gen_kill_estimator(struct rcu_head *head) { @@ -275,6 +276,7 @@ void gen_kill_estimator(struct gnet_stats_basic *bstats, call_rcu(&e->e_rcu, __gen_kill_estimator); } } +EXPORT_SYMBOL(gen_kill_estimator); /** * gen_replace_estimator - replace rate estimator configuration @@ -295,8 +297,30 @@ int gen_replace_estimator(struct gnet_stats_basic *bstats, gen_kill_estimator(bstats, rate_est); return gen_new_estimator(bstats, rate_est, stats_lock, opt); } +EXPORT_SYMBOL(gen_replace_estimator); + +/** + * gen_estimator_active - test if estimator is currently in use + * @rate_est: rate estimator statistics + * + * Returns 1 if estimator is active, and 0 if not. + */ +int gen_estimator_active(const struct gnet_stats_rate_est *rate_est) +{ + int idx; + struct gen_estimator *e; + ASSERT_RTNL(); -EXPORT_SYMBOL(gen_kill_estimator); -EXPORT_SYMBOL(gen_new_estimator); -EXPORT_SYMBOL(gen_replace_estimator); + for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { + if (!elist[idx].timer.function) + continue; + + list_for_each_entry(e, &elist[idx].list, list) { + if (e->rate_est == rate_est) + return 1; + } + } + return 0; +} +EXPORT_SYMBOL(gen_estimator_active); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index e19a0261144a..c39f60cea6ee 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -182,6 +182,12 @@ override: R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]); if (R_tab == NULL) goto failure; + + if (!est && !gen_estimator_active(&police->tcf_rate_est)) { + err = -EINVAL; + goto failure; + } + if (parm->peakrate.rate) { P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE]); -- cgit v1.2.3 From 1748376b6626acf59c24e9592ac67b3fe2a0e026 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Nov 2008 21:16:35 -0800 Subject: net: Use a percpu_counter for sockets_allocated Instead of using one atomic_t per protocol, use a percpu_counter for "sockets_allocated", to reduce cache line contention on heavy duty network servers. Note : We revert commit (248969ae31e1b3276fc4399d67ce29a5d81e6fd9 net: af_unix can make unix_nr_socks visbile in /proc), since it is not anymore used after sock_prot_inuse_add() addition Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 1 + include/net/sock.h | 2 +- include/net/tcp.h | 2 +- net/core/sock.c | 10 +++++++--- net/ipv4/proc.c | 3 ++- net/ipv4/tcp.c | 8 ++++++-- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 2 +- net/sctp/protocol.c | 6 +++++- net/sctp/socket.c | 6 +++--- net/unix/af_unix.c | 1 - 11 files changed, 29 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 23797506f593..bbb7742195b0 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -138,6 +138,7 @@ void sctp_write_space(struct sock *sk); unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); +extern struct percpu_counter sctp_sockets_allocated; /* * sctp/primitive.c diff --git a/include/net/sock.h b/include/net/sock.h index 00cd486d362f..a2a3890959c4 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -649,7 +649,7 @@ struct proto { /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); atomic_t *memory_allocated; /* Current allocated memory. */ - atomic_t *sockets_allocated; /* Current number of sockets. */ + struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. diff --git a/include/net/tcp.h b/include/net/tcp.h index e8ae90a8c35e..cbca3b8a133d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -238,7 +238,7 @@ extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; extern atomic_t tcp_memory_allocated; -extern atomic_t tcp_sockets_allocated; +extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; /* diff --git a/net/core/sock.c b/net/core/sock.c index a4e840e5a053..7a081b647bf9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1071,7 +1071,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) newsk->sk_sleep = NULL; if (newsk->sk_prot->sockets_allocated) - atomic_inc(newsk->sk_prot->sockets_allocated); + percpu_counter_inc(newsk->sk_prot->sockets_allocated); } out: return newsk; @@ -1463,8 +1463,12 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind) } if (prot->memory_pressure) { - if (!*prot->memory_pressure || - prot->sysctl_mem[2] > atomic_read(prot->sockets_allocated) * + int alloc; + + if (!*prot->memory_pressure) + return 1; + alloc = percpu_counter_read_positive(prot->sockets_allocated); + if (prot->sysctl_mem[2] > alloc * sk_mem_pages(sk->sk_wmem_queued + atomic_read(&sk->sk_rmem_alloc) + sk->sk_forward_alloc)) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 731789bb499f..4944b47ad628 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -55,7 +55,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", sock_prot_inuse_get(net, &tcp_prot), atomic_read(&tcp_orphan_count), - tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), + tcp_death_row.tw_count, + (int)percpu_counter_sum_positive(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(net, &udp_prot), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 044224a341eb..e6fade9ebf62 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -290,9 +290,12 @@ EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); atomic_t tcp_memory_allocated; /* Current allocated memory. */ -atomic_t tcp_sockets_allocated; /* Current number of TCP sockets. */ - EXPORT_SYMBOL(tcp_memory_allocated); + +/* + * Current number of TCP sockets. + */ +struct percpu_counter tcp_sockets_allocated; EXPORT_SYMBOL(tcp_sockets_allocated); /* @@ -2685,6 +2688,7 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); + percpu_counter_init(&tcp_sockets_allocated, 0); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cab2458f86fd..26b9030747cc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1797,7 +1797,7 @@ static int tcp_v4_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - atomic_inc(&tcp_sockets_allocated); + percpu_counter_inc(&tcp_sockets_allocated); return 0; } @@ -1845,7 +1845,7 @@ void tcp_v4_destroy_sock(struct sock *sk) sk->sk_sndmsg_page = NULL; } - atomic_dec(&tcp_sockets_allocated); + percpu_counter_dec(&tcp_sockets_allocated); } EXPORT_SYMBOL(tcp_v4_destroy_sock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f259c9671f3e..8702b06cb60a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1830,7 +1830,7 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - atomic_inc(&tcp_sockets_allocated); + percpu_counter_inc(&tcp_sockets_allocated); return 0; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index a8ca743241ee..d5ea232c9126 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -102,6 +102,8 @@ struct sock *sctp_get_ctl_sock(void) /* Set up the proc fs entry for the SCTP protocol. */ static __init int sctp_proc_init(void) { + if (percpu_counter_init(&sctp_sockets_allocated, 0)) + goto out_nomem; #ifdef CONFIG_PROC_FS if (!proc_net_sctp) { struct proc_dir_entry *ent; @@ -110,7 +112,7 @@ static __init int sctp_proc_init(void) ent->owner = THIS_MODULE; proc_net_sctp = ent; } else - goto out_nomem; + goto out_free_percpu; } if (sctp_snmp_proc_init()) @@ -135,6 +137,8 @@ out_snmp_proc_init: proc_net_sctp = NULL; remove_proc_entry("sctp", init_net.proc_net); } +out_free_percpu: + percpu_counter_destroy(&sctp_sockets_allocated); out_nomem: return -ENOMEM; #else diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ba81fe3ccab8..a2de585888d0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -114,7 +114,7 @@ extern int sysctl_sctp_wmem[3]; static int sctp_memory_pressure; static atomic_t sctp_memory_allocated; -static atomic_t sctp_sockets_allocated; +struct percpu_counter sctp_sockets_allocated; static void sctp_enter_memory_pressure(struct sock *sk) { @@ -3613,7 +3613,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->hmac = NULL; SCTP_DBG_OBJCNT_INC(sock); - atomic_inc(&sctp_sockets_allocated); + percpu_counter_inc(&sctp_sockets_allocated); local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -3632,7 +3632,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) /* Release our hold on the endpoint. */ ep = sctp_sk(sk)->ep; sctp_endpoint_free(ep); - atomic_dec(&sctp_sockets_allocated); + percpu_counter_dec(&sctp_sockets_allocated); local_bh_disable(); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); local_bh_enable(); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3a35a6e8bf91..5aaf23e43f1d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -571,7 +571,6 @@ static const struct proto_ops unix_seqpacket_ops = { static struct proto unix_proto = { .name = "UNIX", .owner = THIS_MODULE, - .sockets_allocated = &unix_nr_socks, .obj_size = sizeof(struct unix_sock), }; -- cgit v1.2.3 From dd24c00191d5e4a1ae896aafe33c6b8095ab4bd1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Nov 2008 21:17:14 -0800 Subject: net: Use a percpu_counter for orphan_count Instead of using one atomic_t per protocol, use a percpu_counter for "orphan_count", to reduce cache line contention on heavy duty network servers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- include/net/tcp.h | 2 +- net/dccp/dccp.h | 2 +- net/dccp/proto.c | 16 ++++++++++------ net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv4/proc.c | 2 +- net/ipv4/tcp.c | 12 +++++++----- net/ipv4/tcp_timer.c | 2 +- 8 files changed, 24 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index a2a3890959c4..5a3a151bd730 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -666,7 +666,7 @@ struct proto { unsigned int obj_size; int slab_flags; - atomic_t *orphan_count; + struct percpu_counter *orphan_count; struct request_sock_ops *rsk_prot; struct timewait_sock_ops *twsk_prot; diff --git a/include/net/tcp.h b/include/net/tcp.h index cbca3b8a133d..de1e91d959b8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -46,7 +46,7 @@ extern struct inet_hashinfo tcp_hashinfo; -extern atomic_t tcp_orphan_count; +extern struct percpu_counter tcp_orphan_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER (128 + MAX_HEADER) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 031ce350d3c1..33a1127270c1 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -49,7 +49,7 @@ extern int dccp_debug; extern struct inet_hashinfo dccp_hashinfo; -extern atomic_t dccp_orphan_count; +extern struct percpu_counter dccp_orphan_count; extern void dccp_time_wait(struct sock *sk, int state, int timeo); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ea85c423cdbd..db225f93cd5a 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -40,8 +40,7 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; EXPORT_SYMBOL_GPL(dccp_statistics); -atomic_t dccp_orphan_count = ATOMIC_INIT(0); - +struct percpu_counter dccp_orphan_count; EXPORT_SYMBOL_GPL(dccp_orphan_count); struct inet_hashinfo dccp_hashinfo; @@ -1000,7 +999,7 @@ adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); - atomic_inc(sk->sk_prot->orphan_count); + percpu_counter_inc(sk->sk_prot->orphan_count); /* * It is the last release_sock in its life. It will remove backlog. @@ -1064,18 +1063,21 @@ static int __init dccp_init(void) { unsigned long goal; int ehash_order, bhash_order, i; - int rc = -ENOBUFS; + int rc; BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); - + rc = percpu_counter_init(&dccp_orphan_count, 0); + if (rc) + goto out; + rc = -ENOBUFS; inet_hashinfo_init(&dccp_hashinfo); dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL); if (!dccp_hashinfo.bind_bucket_cachep) - goto out; + goto out_free_percpu; /* * Size and allocate the main established and bind bucket @@ -1168,6 +1170,8 @@ out_free_dccp_ehash: out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); dccp_hashinfo.bind_bucket_cachep = NULL; +out_free_percpu: + percpu_counter_destroy(&dccp_orphan_count); goto out; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 05af807ca9b9..1ccdbba528be 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -561,7 +561,7 @@ void inet_csk_destroy_sock(struct sock *sk) sk_refcnt_debug_release(sk); - atomic_dec(sk->sk_prot->orphan_count); + percpu_counter_dec(sk->sk_prot->orphan_count); sock_put(sk); } @@ -641,7 +641,7 @@ void inet_csk_listen_stop(struct sock *sk) sock_orphan(child); - atomic_inc(sk->sk_prot->orphan_count); + percpu_counter_inc(sk->sk_prot->orphan_count); inet_csk_destroy_sock(child); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 4944b47ad628..614958b7c276 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -54,7 +54,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", sock_prot_inuse_get(net, &tcp_prot), - atomic_read(&tcp_orphan_count), + (int)percpu_counter_sum_positive(&tcp_orphan_count), tcp_death_row.tw_count, (int)percpu_counter_sum_positive(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e6fade9ebf62..019243408623 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -277,8 +277,7 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; -atomic_t tcp_orphan_count = ATOMIC_INIT(0); - +struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); int sysctl_tcp_mem[3] __read_mostly; @@ -1837,7 +1836,7 @@ adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); - atomic_inc(sk->sk_prot->orphan_count); + percpu_counter_inc(sk->sk_prot->orphan_count); /* It is the last release_sock in its life. It will remove backlog. */ release_sock(sk); @@ -1888,9 +1887,11 @@ adjudge_to_death: } } if (sk->sk_state != TCP_CLOSE) { + int orphan_count = percpu_counter_read_positive( + sk->sk_prot->orphan_count); + sk_mem_reclaim(sk); - if (tcp_too_many_orphans(sk, - atomic_read(sk->sk_prot->orphan_count))) { + if (tcp_too_many_orphans(sk, orphan_count)) { if (net_ratelimit()) printk(KERN_INFO "TCP: too many of orphaned " "sockets\n"); @@ -2689,6 +2690,7 @@ void __init tcp_init(void) BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); percpu_counter_init(&tcp_sockets_allocated, 0); + percpu_counter_init(&tcp_orphan_count, 0); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 3df339e3e363..cc4e6d27dedc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -65,7 +65,7 @@ static void tcp_write_err(struct sock *sk) static int tcp_out_of_resources(struct sock *sk, int do_reset) { struct tcp_sock *tp = tcp_sk(sk); - int orphans = atomic_read(&tcp_orphan_count); + int orphans = percpu_counter_read_positive(&tcp_orphan_count); /* If peer does not open window for long time, or did not transmit * anything for long time, penalize it. */ -- cgit v1.2.3 From 5a45cfe1c64862e8cd3b0d79d7c4ba71c3118915 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 26 Nov 2008 00:16:24 -0500 Subject: ftrace: use code patching for ftrace graph tracer Impact: more efficient code for ftrace graph tracer This patch uses the dynamic patching, when available, to patch the function graph code into the kernel. This patch will ease the way for letting both function tracing and function graph tracing run together. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 5 +++++ arch/x86/kernel/ftrace.c | 48 ++++++++++++++++++++++++++++++++++++++++++++-- include/linux/ftrace.h | 5 +++++ kernel/trace/ftrace.c | 35 ++++++++++++++++----------------- 4 files changed, 72 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 7def9fd5c1e6..958af86186c4 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1174,6 +1174,11 @@ ftrace_call: popl %edx popl %ecx popl %eax +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + jmp ftrace_stub +#endif .globl ftrace_stub ftrace_stub: diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 26b2d92d48b3..7ef914e6a2f6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -111,7 +111,6 @@ static void ftrace_mod_code(void) */ mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, MCOUNT_INSN_SIZE); - } void ftrace_nmi_enter(void) @@ -325,7 +324,51 @@ int __init ftrace_dyn_arch_init(void *data) #ifdef CONFIG_FUNCTION_GRAPH_TRACER -#ifndef CONFIG_DYNAMIC_FTRACE +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_graph_call(void); + +static int ftrace_mod_jmp(unsigned long ip, + int old_offset, int new_offset) +{ + unsigned char code[MCOUNT_INSN_SIZE]; + + if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) + return -EINVAL; + + *(int *)(&code[1]) = new_offset; + + if (do_ftrace_mod_code(ip, &code)) + return -EPERM; + + return 0; +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + unsigned long ip = (unsigned long)(&ftrace_graph_call); + int old_offset, new_offset; + + old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); + new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); + + return ftrace_mod_jmp(ip, old_offset, new_offset); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + unsigned long ip = (unsigned long)(&ftrace_graph_call); + int old_offset, new_offset; + + old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); + new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); + + return ftrace_mod_jmp(ip, old_offset, new_offset); +} + +#else /* CONFIG_DYNAMIC_FTRACE */ /* * These functions are picked from those used on @@ -343,6 +386,7 @@ void ftrace_nmi_exit(void) { atomic_dec(&in_nmi); } + #endif /* !CONFIG_DYNAMIC_FTRACE */ /* Add a function return address to the trace stack on thread info.*/ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fc2d54987198..f9792c0d73f6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -117,6 +117,11 @@ extern void ftrace_call(void); extern void mcount_call(void); #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern void ftrace_graph_caller(void); +extern int ftrace_enable_ftrace_graph_caller(void); +extern int ftrace_disable_ftrace_graph_caller(void); +#else +static inline int ftrace_enable_ftrace_graph_caller(void) { return 0; } +static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } #endif /** diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 00d98c65fad0..5f7c8642d58b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -281,6 +281,8 @@ enum { FTRACE_UPDATE_TRACE_FUNC = (1 << 2), FTRACE_ENABLE_MCOUNT = (1 << 3), FTRACE_DISABLE_MCOUNT = (1 << 4), + FTRACE_START_FUNC_RET = (1 << 5), + FTRACE_STOP_FUNC_RET = (1 << 6), }; static int ftrace_filtered; @@ -465,14 +467,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) unsigned long ip, fl; unsigned long ftrace_addr; -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - if (ftrace_tracing_type == FTRACE_TYPE_ENTER) - ftrace_addr = (unsigned long)ftrace_caller; - else - ftrace_addr = (unsigned long)ftrace_graph_caller; -#else ftrace_addr = (unsigned long)ftrace_caller; -#endif ip = rec->ip; @@ -605,6 +600,11 @@ static int __ftrace_modify_code(void *data) if (*command & FTRACE_UPDATE_TRACE_FUNC) ftrace_update_ftrace_func(ftrace_trace_function); + if (*command & FTRACE_START_FUNC_RET) + ftrace_enable_ftrace_graph_caller(); + else if (*command & FTRACE_STOP_FUNC_RET) + ftrace_disable_ftrace_graph_caller(); + return 0; } @@ -629,10 +629,8 @@ static void ftrace_startup_enable(int command) ftrace_run_update_code(command); } -static void ftrace_startup(void) +static void ftrace_startup(int command) { - int command = 0; - if (unlikely(ftrace_disabled)) return; @@ -645,10 +643,8 @@ static void ftrace_startup(void) mutex_unlock(&ftrace_start_lock); } -static void ftrace_shutdown(void) +static void ftrace_shutdown(int command) { - int command = 0; - if (unlikely(ftrace_disabled)) return; @@ -1453,8 +1449,9 @@ device_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_enable(int command) { } -# define ftrace_startup() do { } while (0) -# define ftrace_shutdown() do { } while (0) +/* Keep as macros so we do not need to define the commands */ +# define ftrace_startup(command) do { } while (0) +# define ftrace_shutdown(command) do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) #endif /* CONFIG_DYNAMIC_FTRACE */ @@ -1585,7 +1582,7 @@ int register_ftrace_function(struct ftrace_ops *ops) } ret = __register_ftrace_function(ops); - ftrace_startup(); + ftrace_startup(0); out: mutex_unlock(&ftrace_sysctl_lock); @@ -1604,7 +1601,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) mutex_lock(&ftrace_sysctl_lock); ret = __unregister_ftrace_function(ops); - ftrace_shutdown(); + ftrace_shutdown(0); mutex_unlock(&ftrace_sysctl_lock); return ret; @@ -1751,7 +1748,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, ftrace_tracing_type = FTRACE_TYPE_RETURN; ftrace_graph_return = retfunc; ftrace_graph_entry = entryfunc; - ftrace_startup(); + ftrace_startup(FTRACE_START_FUNC_RET); out: mutex_unlock(&ftrace_sysctl_lock); @@ -1765,7 +1762,7 @@ void unregister_ftrace_graph(void) atomic_dec(&ftrace_graph_active); ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; ftrace_graph_entry = (trace_func_graph_ent_t)ftrace_stub; - ftrace_shutdown(); + ftrace_shutdown(FTRACE_STOP_FUNC_RET); /* Restore normal tracing type */ ftrace_tracing_type = FTRACE_TYPE_ENTER; -- cgit v1.2.3 From f3f47a6768a29448866da4422b6f6bee485c947f Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 23 Nov 2008 16:49:58 -0800 Subject: tracing: add "power-tracer": C/P state tracer to help power optimization Impact: new "power-tracer" ftrace plugin This patch adds a C/P-state ftrace plugin that will generate detailed statistics about the C/P-states that are being used, so that we can look at detailed decisions that the C/P-state code is making, rather than the too high level "average" that we have today. An example way of using this is: mount -t debugfs none /sys/kernel/debug echo cstate > /sys/kernel/debug/tracing/current_tracer echo 1 > /sys/kernel/debug/tracing/tracing_enabled sleep 1 echo 0 > /sys/kernel/debug/tracing/tracing_enabled cat /sys/kernel/debug/tracing/trace | perl scripts/trace/cstate.pl > out.svg Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 4 + arch/x86/kernel/process.c | 16 +++ include/linux/ftrace.h | 29 +++++ kernel/trace/Kconfig | 11 ++ kernel/trace/Makefile | 1 + kernel/trace/trace.h | 7 ++ kernel/trace/trace_power.c | 179 +++++++++++++++++++++++++++++ scripts/trace/power.pl | 108 +++++++++++++++++ 8 files changed, 355 insertions(+) create mode 100644 kernel/trace/trace_power.c create mode 100644 scripts/trace/power.pl (limited to 'include') diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8e48c5d4467d..88ea02dcb622 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, unsigned int next_perf_state = 0; /* Index into perf table */ unsigned int i; int result = 0; + struct power_trace it; dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); @@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, } } + trace_power_mark(&it, POWER_PSTATE, next_perf_state); + switch (data->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c622772744d8..c27af49a4ede 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -7,6 +7,7 @@ #include #include #include +#include #include unsigned long idle_halt; @@ -100,6 +101,9 @@ static inline int hlt_use_halt(void) void default_idle(void) { if (hlt_use_halt()) { + struct power_trace it; + + trace_power_start(&it, POWER_CSTATE, 1); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we @@ -112,6 +116,7 @@ void default_idle(void) else local_irq_enable(); current_thread_info()->status |= TS_POLLING; + trace_power_end(&it); } else { local_irq_enable(); /* loop is done by the caller */ @@ -154,24 +159,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { + struct power_trace it; + + trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) __mwait(ax, cx); } + trace_power_end(&it); } /* Default MONITOR/MWAIT with no hints, used for default C1 state */ static void mwait_idle(void) { + struct power_trace it; if (!need_resched()) { + trace_power_start(&it, POWER_CSTATE, 1); __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) __sti_mwait(0, 0); else local_irq_enable(); + trace_power_end(&it); } else local_irq_enable(); } @@ -183,9 +195,13 @@ static void mwait_idle(void) */ static void poll_idle(void) { + struct power_trace it; + + trace_power_start(&it, POWER_CSTATE, 0); local_irq_enable(); while (!need_resched()) cpu_relax(); + trace_power_end(&it); } /* diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7854d87b97b2..0df288666201 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -311,6 +311,35 @@ ftrace_init_module(struct module *mod, unsigned long *start, unsigned long *end) { } #endif +enum { + POWER_NONE = 0, + POWER_CSTATE = 1, + POWER_PSTATE = 2, +}; + +struct power_trace { +#ifdef CONFIG_POWER_TRACER + ktime_t stamp; + ktime_t end; + int type; + int state; +#endif +}; + +#ifdef CONFIG_POWER_TRACER +extern void trace_power_start(struct power_trace *it, unsigned int type, + unsigned int state); +extern void trace_power_mark(struct power_trace *it, unsigned int type, + unsigned int state); +extern void trace_power_end(struct power_trace *it); +#else +static inline void trace_power_start(struct power_trace *it, unsigned int type, + unsigned int state) { } +static inline void trace_power_mark(struct power_trace *it, unsigned int type, + unsigned int state) { } +static inline void trace_power_end(struct power_trace *it) { } +#endif + /* * Structure that defines a return function trace. diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 620feadff67a..d151aab48ed6 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -217,6 +217,17 @@ config BRANCH_TRACER Say N if unsure. +config POWER_TRACER + bool "Trace power consumption behavior" + depends on DEBUG_KERNEL + depends on X86 + select TRACING + help + This tracer helps developers to analyze and optimize the kernels + power management decisions, specifically the C-state and P-state + behavior. + + config STACK_TRACER bool "Trace max stack" depends on HAVE_FUNCTION_TRACER diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index cef4bcb4e822..acaa06553eca 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -32,5 +32,6 @@ obj-$(CONFIG_BOOT_TRACER) += trace_boot.o obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o obj-$(CONFIG_BTS_TRACER) += trace_bts.o +obj-$(CONFIG_POWER_TRACER) += trace_power.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3abd645e8af2..4c453778a6ab 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -28,6 +28,7 @@ enum trace_type { TRACE_FN_RET, TRACE_USER_STACK, TRACE_BTS, + TRACE_POWER, __TRACE_LAST_TYPE }; @@ -160,6 +161,11 @@ struct bts_entry { unsigned long to; }; +struct trace_power { + struct trace_entry ent; + struct power_trace state_data; +}; + /* * trace_flag_type is an enumeration that holds different * states when a trace occurs. These are: @@ -266,6 +272,7 @@ extern void __ftrace_bad_type(void); IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \ IF_ASSIGN(var, ent, struct ftrace_ret_entry, TRACE_FN_RET);\ IF_ASSIGN(var, ent, struct bts_entry, TRACE_BTS);\ + IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ __ftrace_bad_type(); \ } while (0) diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c new file mode 100644 index 000000000000..a7172a352f62 --- /dev/null +++ b/kernel/trace/trace_power.c @@ -0,0 +1,179 @@ +/* + * ring buffer based C-state tracer + * + * Arjan van de Ven + * Copyright (C) 2008 Intel Corporation + * + * Much is borrowed from trace_boot.c which is + * Copyright (C) 2008 Frederic Weisbecker + * + */ + +#include +#include +#include +#include +#include + +#include "trace.h" + +static struct trace_array *power_trace; +static int __read_mostly trace_power_enabled; + + +static void start_power_trace(struct trace_array *tr) +{ + trace_power_enabled = 1; +} + +static void stop_power_trace(struct trace_array *tr) +{ + trace_power_enabled = 0; +} + + +static int power_trace_init(struct trace_array *tr) +{ + int cpu; + power_trace = tr; + + trace_power_enabled = 1; + + for_each_cpu_mask(cpu, cpu_possible_map) + tracing_reset(tr, cpu); + return 0; +} + +static enum print_line_t power_print_line(struct trace_iterator *iter) +{ + int ret = 0; + struct trace_entry *entry = iter->ent; + struct trace_power *field ; + struct power_trace *it; + struct trace_seq *s = &iter->seq; + struct timespec stamp; + struct timespec duration; + + trace_assign_type(field, entry); + it = &field->state_data; + stamp = ktime_to_timespec(it->stamp); + duration = ktime_to_timespec(ktime_sub(it->end, it->stamp)); + + if (entry->type == TRACE_POWER) { + if (it->type == POWER_CSTATE) + ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n", + stamp.tv_sec, + stamp.tv_nsec, + it->state, iter->cpu, + duration.tv_sec, + duration.tv_nsec); + if (it->type == POWER_PSTATE) + ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n", + stamp.tv_sec, + stamp.tv_nsec, + it->state, iter->cpu); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + return TRACE_TYPE_HANDLED; + } + return TRACE_TYPE_UNHANDLED; +} + +static struct tracer power_tracer __read_mostly = +{ + .name = "power", + .init = power_trace_init, + .start = start_power_trace, + .stop = stop_power_trace, + .reset = stop_power_trace, + .print_line = power_print_line, +}; + +static int init_power_trace(void) +{ + return register_tracer(&power_tracer); +} +device_initcall(init_power_trace); + +void trace_power_start(struct power_trace *it, unsigned int type, + unsigned int level) +{ + if (!trace_power_enabled) + return; + + memset(it, 0, sizeof(struct power_trace)); + it->state = level; + it->type = type; + it->stamp = ktime_get(); +} +EXPORT_SYMBOL_GPL(trace_power_start); + + +void trace_power_end(struct power_trace *it) +{ + struct ring_buffer_event *event; + struct trace_power *entry; + struct trace_array_cpu *data; + unsigned long irq_flags; + struct trace_array *tr = power_trace; + + if (!trace_power_enabled) + return; + + preempt_disable(); + it->end = ktime_get(); + data = tr->data[smp_processor_id()]; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + entry->ent.type = TRACE_POWER; + entry->state_data = *it; + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); + + out: + preempt_enable(); +} +EXPORT_SYMBOL_GPL(trace_power_end); + +void trace_power_mark(struct power_trace *it, unsigned int type, + unsigned int level) +{ + struct ring_buffer_event *event; + struct trace_power *entry; + struct trace_array_cpu *data; + unsigned long irq_flags; + struct trace_array *tr = power_trace; + + if (!trace_power_enabled) + return; + + memset(it, 0, sizeof(struct power_trace)); + it->state = level; + it->type = type; + it->stamp = ktime_get(); + preempt_disable(); + it->end = it->stamp; + data = tr->data[smp_processor_id()]; + + event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), + &irq_flags); + if (!event) + goto out; + entry = ring_buffer_event_data(event); + tracing_generic_entry_update(&entry->ent, 0, 0); + entry->ent.type = TRACE_POWER; + entry->state_data = *it; + ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + + trace_wake_up(); + + out: + preempt_enable(); +} +EXPORT_SYMBOL_GPL(trace_power_mark); diff --git a/scripts/trace/power.pl b/scripts/trace/power.pl new file mode 100644 index 000000000000..4f729b3501e0 --- /dev/null +++ b/scripts/trace/power.pl @@ -0,0 +1,108 @@ +#!/usr/bin/perl + +# Copyright 2008, Intel Corporation +# +# This file is part of the Linux kernel +# +# This program file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program in a file named COPYING; if not, write to the +# Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, +# Boston, MA 02110-1301 USA +# +# Authors: +# Arjan van de Ven + + +# +# This script turns a cstate ftrace output into a SVG graphic that shows +# historic C-state information +# +# +# cat /sys/kernel/debug/tracing/trace | perl power.pl > out.svg +# + +my @styles; +my $base = 0; + +my @pstate_last; +my @pstate_level; + +$styles[0] = "fill:rgb(0,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[1] = "fill:rgb(0,255,0);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[2] = "fill:rgb(255,0,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[3] = "fill:rgb(255,255,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[4] = "fill:rgb(255,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[5] = "fill:rgb(0,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[6] = "fill:rgb(0,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[7] = "fill:rgb(0,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; +$styles[8] = "fill:rgb(0,25,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)"; + + +print " \n"; +print "\n"; + +my $scale = 30000.0; +while (<>) { + my $line = $_; + if ($line =~ /([0-9\.]+)\] CSTATE: Going to C([0-9]) on cpu ([0-9]+) for ([0-9\.]+)/) { + if ($base == 0) { + $base = $1; + } + my $time = $1 - $base; + $time = $time * $scale; + my $C = $2; + my $cpu = $3; + my $y = 400 * $cpu; + my $duration = $4 * $scale; + my $msec = int($4 * 100000)/100.0; + my $height = $C * 20; + $style = $styles[$C]; + + $y = $y + 140 - $height; + + $x2 = $time + 4; + $y2 = $y + 4; + + + print "\n"; + print "C$C $msec\n"; + } + if ($line =~ /([0-9\.]+)\] PSTATE: Going to P([0-9]) on cpu ([0-9]+)/) { + my $time = $1 - $base; + my $state = $2; + my $cpu = $3; + + if (defined($pstate_last[$cpu])) { + my $from = $pstate_last[$cpu]; + my $oldstate = $pstate_state[$cpu]; + my $duration = ($time-$from) * $scale; + + $from = $from * $scale; + my $to = $from + $duration; + my $height = 140 - ($oldstate * (140/8)); + + my $y = 400 * $cpu + 200 + $height; + my $y2 = $y+4; + my $style = $styles[8]; + + print "\n"; + print "P$oldstate (cpu $cpu)\n"; + }; + + $pstate_last[$cpu] = $time; + $pstate_state[$cpu] = $state; + } +} + + +print "\n"; -- cgit v1.2.3 From 5f3ea37c7716db4e894a480e0c18b24399595b6b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Oct 2008 08:34:33 +0100 Subject: blktrace: port to tracepoints This was a forward port of work done by Mathieu Desnoyers, I changed it to encode the 'what' parameter on the tracepoint name, so that one can register interest in specific events and not on classes of events to then check the 'what' parameter. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Jens Axboe Signed-off-by: Ingo Molnar --- block/Kconfig | 1 + block/blk-core.c | 33 ++--- block/blktrace.c | 332 ++++++++++++++++++++++++++++++++++++++++++- block/elevator.c | 7 +- drivers/md/dm.c | 6 +- fs/bio.c | 3 +- include/linux/blktrace_api.h | 172 +--------------------- include/trace/block.h | 60 ++++++++ mm/bounce.c | 3 +- 9 files changed, 418 insertions(+), 199 deletions(-) create mode 100644 include/trace/block.h (limited to 'include') diff --git a/block/Kconfig b/block/Kconfig index 1ab7c15c8d7a..290b219fad9c 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -47,6 +47,7 @@ config BLK_DEV_IO_TRACE depends on SYSFS select RELAY select DEBUG_FS + select TRACEPOINTS help Say Y here if you want to be able to trace the block layer actions on a given queue. Tracing allows you to see any traffic happening diff --git a/block/blk-core.c b/block/blk-core.c index 10e8a64a5a5b..04267d66a2b9 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "blk.h" @@ -205,7 +206,7 @@ void blk_plug_device(struct request_queue *q) if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); - blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); + trace_block_plug(q); } } EXPORT_SYMBOL(blk_plug_device); @@ -292,9 +293,7 @@ void blk_unplug_work(struct work_struct *work) struct request_queue *q = container_of(work, struct request_queue, unplug_work); - blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, - q->rq.count[READ] + q->rq.count[WRITE]); - + trace_block_unplug_io(q); q->unplug_fn(q); } @@ -302,9 +301,7 @@ void blk_unplug_timeout(unsigned long data) { struct request_queue *q = (struct request_queue *)data; - blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, - q->rq.count[READ] + q->rq.count[WRITE]); - + trace_block_unplug_timer(q); kblockd_schedule_work(q, &q->unplug_work); } @@ -314,9 +311,7 @@ void blk_unplug(struct request_queue *q) * devices don't necessarily have an ->unplug_fn defined */ if (q->unplug_fn) { - blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, - q->rq.count[READ] + q->rq.count[WRITE]); - + trace_block_unplug_io(q); q->unplug_fn(q); } } @@ -822,7 +817,7 @@ rq_starved: if (ioc_batching(q, ioc)) ioc->nr_batch_requests--; - blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); + trace_block_getrq(q, bio, rw); out: return rq; } @@ -848,7 +843,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, prepare_to_wait_exclusive(&rl->wait[rw], &wait, TASK_UNINTERRUPTIBLE); - blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); + trace_block_sleeprq(q, bio, rw); __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); @@ -928,7 +923,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) { blk_delete_timer(rq); blk_clear_rq_complete(rq); - blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); + trace_block_rq_requeue(q, rq); if (blk_rq_tagged(rq)) blk_queue_end_tag(q, rq); @@ -1167,7 +1162,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) if (!ll_back_merge_fn(q, req, bio)) break; - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); + trace_block_bio_backmerge(q, bio); req->biotail->bi_next = bio; req->biotail = bio; @@ -1186,7 +1181,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) if (!ll_front_merge_fn(q, req, bio)) break; - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); + trace_block_bio_frontmerge(q, bio); bio->bi_next = req->bio; req->bio = bio; @@ -1269,7 +1264,7 @@ static inline void blk_partition_remap(struct bio *bio) bio->bi_sector += p->start_sect; bio->bi_bdev = bdev->bd_contains; - blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, + trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, bdev->bd_dev, bio->bi_sector, bio->bi_sector - p->start_sect); } @@ -1441,10 +1436,10 @@ end_io: goto end_io; if (old_sector != -1) - blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, + trace_block_remap(q, bio, old_dev, bio->bi_sector, old_sector); - blk_add_trace_bio(q, bio, BLK_TA_QUEUE); + trace_block_bio_queue(q, bio); old_sector = bio->bi_sector; old_dev = bio->bi_bdev->bd_dev; @@ -1656,7 +1651,7 @@ static int __end_that_request_first(struct request *req, int error, int total_bytes, bio_nbytes, next_idx = 0; struct bio *bio; - blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); + trace_block_rq_complete(req->q, req); /* * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual diff --git a/block/blktrace.c b/block/blktrace.c index 85049a7e7a17..b0a2cae886db 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -23,10 +23,18 @@ #include #include #include +#include #include static unsigned int blktrace_seq __read_mostly = 1; +/* Global reference count of probes */ +static DEFINE_MUTEX(blk_probe_mutex); +static atomic_t blk_probes_ref = ATOMIC_INIT(0); + +static int blk_register_tracepoints(void); +static void blk_unregister_tracepoints(void); + /* * Send out a notify message. */ @@ -119,7 +127,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK * The worker for the various blk_add_trace*() types. Fills out a * blk_io_trace structure and places it in a per-cpu subbuffer. */ -void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, +static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, int rw, u32 what, int error, int pdu_len, void *pdu_data) { struct task_struct *tsk = current; @@ -177,8 +185,6 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(__blk_add_trace); - static struct dentry *blk_tree_root; static DEFINE_MUTEX(blk_tree_mutex); static unsigned int root_users; @@ -237,6 +243,10 @@ static void blk_trace_cleanup(struct blk_trace *bt) free_percpu(bt->sequence); free_percpu(bt->msg_data); kfree(bt); + mutex_lock(&blk_probe_mutex); + if (atomic_dec_and_test(&blk_probes_ref)) + blk_unregister_tracepoints(); + mutex_unlock(&blk_probe_mutex); } int blk_trace_remove(struct request_queue *q) @@ -428,6 +438,14 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, bt->pid = buts->pid; bt->trace_state = Blktrace_setup; + mutex_lock(&blk_probe_mutex); + if (atomic_add_return(1, &blk_probes_ref) == 1) { + ret = blk_register_tracepoints(); + if (ret) + goto probe_err; + } + mutex_unlock(&blk_probe_mutex); + ret = -EBUSY; old_bt = xchg(&q->blk_trace, bt); if (old_bt) { @@ -436,6 +454,9 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, } return 0; +probe_err: + atomic_dec(&blk_probes_ref); + mutex_unlock(&blk_probe_mutex); err: if (dir) blk_remove_tree(dir); @@ -562,3 +583,308 @@ void blk_trace_shutdown(struct request_queue *q) blk_trace_remove(q); } } + +/* + * blktrace probes + */ + +/** + * blk_add_trace_rq - Add a trace for a request oriented action + * @q: queue the io is for + * @rq: the source request + * @what: the action + * + * Description: + * Records an action against a request. Will log the bio offset + size. + * + **/ +static void blk_add_trace_rq(struct request_queue *q, struct request *rq, + u32 what) +{ + struct blk_trace *bt = q->blk_trace; + int rw = rq->cmd_flags & 0x03; + + if (likely(!bt)) + return; + + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + + if (blk_pc_request(rq)) { + what |= BLK_TC_ACT(BLK_TC_PC); + __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, + sizeof(rq->cmd), rq->cmd); + } else { + what |= BLK_TC_ACT(BLK_TC_FS); + __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + rw, what, rq->errors, 0, NULL); + } +} + +static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_ABORT); +} + +static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_INSERT); +} + +static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_ISSUE); +} + +static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); +} + +static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); +} + +/** + * blk_add_trace_bio - Add a trace for a bio oriented action + * @q: queue the io is for + * @bio: the source bio + * @what: the action + * + * Description: + * Records an action against a bio. Will log the bio offset + size. + * + **/ +static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, + u32 what) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, + !bio_flagged(bio, BIO_UPTODATE), 0, NULL); +} + +static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); +} + +static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); +} + +static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); +} + +static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); +} + +static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_QUEUE); +} + +static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw) +{ + if (bio) + blk_add_trace_bio(q, bio, BLK_TA_GETRQ); + else { + struct blk_trace *bt = q->blk_trace; + + if (bt) + __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL); + } +} + + +static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw) +{ + if (bio) + blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); + else { + struct blk_trace *bt = q->blk_trace; + + if (bt) + __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL); + } +} + +static void blk_add_trace_plug(struct request_queue *q) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) + __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); +} + +static void blk_add_trace_unplug_io(struct request_queue *q) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) { + unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; + __be64 rpdu = cpu_to_be64(pdu); + + __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, + sizeof(rpdu), &rpdu); + } +} + +static void blk_add_trace_unplug_timer(struct request_queue *q) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) { + unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; + __be64 rpdu = cpu_to_be64(pdu); + + __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0, + sizeof(rpdu), &rpdu); + } +} + +static void blk_add_trace_split(struct request_queue *q, struct bio *bio, + unsigned int pdu) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) { + __be64 rpdu = cpu_to_be64(pdu); + + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, + BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE), + sizeof(rpdu), &rpdu); + } +} + +/** + * blk_add_trace_remap - Add a trace for a remap operation + * @q: queue the io is for + * @bio: the source bio + * @dev: target device + * @from: source sector + * @to: target sector + * + * Description: + * Device mapper or raid target sometimes need to split a bio because + * it spans a stripe (or similar). Add a trace for that action. + * + **/ +static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, + dev_t dev, sector_t from, sector_t to) +{ + struct blk_trace *bt = q->blk_trace; + struct blk_io_trace_remap r; + + if (likely(!bt)) + return; + + r.device = cpu_to_be32(dev); + r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); + r.sector = cpu_to_be64(to); + + __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, + !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); +} + +/** + * blk_add_driver_data - Add binary message with driver-specific data + * @q: queue the io is for + * @rq: io request + * @data: driver-specific data + * @len: length of driver-specific data + * + * Description: + * Some drivers might want to write driver-specific data per request. + * + **/ +void blk_add_driver_data(struct request_queue *q, + struct request *rq, + void *data, size_t len) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + if (blk_pc_request(rq)) + __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, + rq->errors, len, data); + else + __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + 0, BLK_TA_DRV_DATA, rq->errors, len, data); +} +EXPORT_SYMBOL_GPL(blk_add_driver_data); + +static int blk_register_tracepoints(void) +{ + int ret; + + ret = register_trace_block_rq_abort(blk_add_trace_rq_abort); + WARN_ON(ret); + ret = register_trace_block_rq_insert(blk_add_trace_rq_insert); + WARN_ON(ret); + ret = register_trace_block_rq_issue(blk_add_trace_rq_issue); + WARN_ON(ret); + ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue); + WARN_ON(ret); + ret = register_trace_block_rq_complete(blk_add_trace_rq_complete); + WARN_ON(ret); + ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce); + WARN_ON(ret); + ret = register_trace_block_bio_complete(blk_add_trace_bio_complete); + WARN_ON(ret); + ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); + WARN_ON(ret); + ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); + WARN_ON(ret); + ret = register_trace_block_bio_queue(blk_add_trace_bio_queue); + WARN_ON(ret); + ret = register_trace_block_getrq(blk_add_trace_getrq); + WARN_ON(ret); + ret = register_trace_block_sleeprq(blk_add_trace_sleeprq); + WARN_ON(ret); + ret = register_trace_block_plug(blk_add_trace_plug); + WARN_ON(ret); + ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer); + WARN_ON(ret); + ret = register_trace_block_unplug_io(blk_add_trace_unplug_io); + WARN_ON(ret); + ret = register_trace_block_split(blk_add_trace_split); + WARN_ON(ret); + ret = register_trace_block_remap(blk_add_trace_remap); + WARN_ON(ret); + return 0; +} + +static void blk_unregister_tracepoints(void) +{ + unregister_trace_block_remap(blk_add_trace_remap); + unregister_trace_block_split(blk_add_trace_split); + unregister_trace_block_unplug_io(blk_add_trace_unplug_io); + unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer); + unregister_trace_block_plug(blk_add_trace_plug); + unregister_trace_block_sleeprq(blk_add_trace_sleeprq); + unregister_trace_block_getrq(blk_add_trace_getrq); + unregister_trace_block_bio_queue(blk_add_trace_bio_queue); + unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); + unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); + unregister_trace_block_bio_complete(blk_add_trace_bio_complete); + unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce); + unregister_trace_block_rq_complete(blk_add_trace_rq_complete); + unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue); + unregister_trace_block_rq_issue(blk_add_trace_rq_issue); + unregister_trace_block_rq_insert(blk_add_trace_rq_insert); + unregister_trace_block_rq_abort(blk_add_trace_rq_abort); + + tracepoint_synchronize_unregister(); +} diff --git a/block/elevator.c b/block/elevator.c index 9ac82dde99dd..530fcfe2ef07 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -586,7 +587,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) unsigned ordseq; int unplug_it = 1; - blk_add_trace_rq(q, rq, BLK_TA_INSERT); + trace_block_rq_insert(q, rq); rq->q = q; @@ -772,7 +773,7 @@ struct request *elv_next_request(struct request_queue *q) * not be passed by new incoming requests */ rq->cmd_flags |= REQ_STARTED; - blk_add_trace_rq(q, rq, BLK_TA_ISSUE); + trace_block_rq_issue(q, rq); } if (!q->boundary_rq || q->boundary_rq == rq) { @@ -921,7 +922,7 @@ void elv_abort_queue(struct request_queue *q) while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); rq->cmd_flags |= REQ_QUIET; - blk_add_trace_rq(q, rq, BLK_TA_ABORT); + trace_block_rq_abort(q, rq); __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); } } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c99e4728ff41..d23fda178163 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -21,6 +21,7 @@ #include #include #include +#include #define DM_MSG_PREFIX "core" @@ -504,8 +505,7 @@ static void dec_pending(struct dm_io *io, int error) end_io_acct(io); if (io->error != DM_ENDIO_REQUEUE) { - blk_add_trace_bio(io->md->queue, io->bio, - BLK_TA_COMPLETE); + trace_block_bio_complete(io->md->queue, io->bio); bio_endio(io->bio, io->error); } @@ -598,7 +598,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ - blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, + trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, tio->io->bio->bi_bdev->bd_dev, clone->bi_sector, sector); diff --git a/fs/bio.c b/fs/bio.c index 77a55bcceedb..060859c69092 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -26,6 +26,7 @@ #include #include #include +#include #include /* for struct sg_iovec */ static struct kmem_cache *bio_slab __read_mostly; @@ -1263,7 +1264,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) if (!bp) return bp; - blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi, + trace_block_split(bdev_get_queue(bi->bi_bdev), bi, bi->bi_sector + first_sectors); BUG_ON(bi->bi_vcnt != 1); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index bdf505d33e77..1dba3493d520 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -160,7 +160,6 @@ struct blk_trace { extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); -extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); extern int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct blk_user_trace_setup *buts); extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); @@ -186,168 +185,8 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); } while (0) #define BLK_TN_MAX_MSG 128 -/** - * blk_add_trace_rq - Add a trace for a request oriented action - * @q: queue the io is for - * @rq: the source request - * @what: the action - * - * Description: - * Records an action against a request. Will log the bio offset + size. - * - **/ -static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq, - u32 what) -{ - struct blk_trace *bt = q->blk_trace; - int rw = rq->cmd_flags & 0x03; - - if (likely(!bt)) - return; - - if (blk_discard_rq(rq)) - rw |= (1 << BIO_RW_DISCARD); - - if (blk_pc_request(rq)) { - what |= BLK_TC_ACT(BLK_TC_PC); - __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd); - } else { - what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL); - } -} - -/** - * blk_add_trace_bio - Add a trace for a bio oriented action - * @q: queue the io is for - * @bio: the source bio - * @what: the action - * - * Description: - * Records an action against a bio. Will log the bio offset + size. - * - **/ -static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio, - u32 what) -{ - struct blk_trace *bt = q->blk_trace; - - if (likely(!bt)) - return; - - __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL); -} - -/** - * blk_add_trace_generic - Add a trace for a generic action - * @q: queue the io is for - * @bio: the source bio - * @rw: the data direction - * @what: the action - * - * Description: - * Records a simple trace - * - **/ -static inline void blk_add_trace_generic(struct request_queue *q, - struct bio *bio, int rw, u32 what) -{ - struct blk_trace *bt = q->blk_trace; - - if (likely(!bt)) - return; - - if (bio) - blk_add_trace_bio(q, bio, what); - else - __blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL); -} - -/** - * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload - * @q: queue the io is for - * @what: the action - * @bio: the source bio - * @pdu: the integer payload - * - * Description: - * Adds a trace with some integer payload. This might be an unplug - * option given as the action, with the depth at unplug time given - * as the payload - * - **/ -static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what, - struct bio *bio, unsigned int pdu) -{ - struct blk_trace *bt = q->blk_trace; - __be64 rpdu = cpu_to_be64(pdu); - - if (likely(!bt)) - return; - - if (bio) - __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu); - else - __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu); -} - -/** - * blk_add_trace_remap - Add a trace for a remap operation - * @q: queue the io is for - * @bio: the source bio - * @dev: target device - * @from: source sector - * @to: target sector - * - * Description: - * Device mapper or raid target sometimes need to split a bio because - * it spans a stripe (or similar). Add a trace for that action. - * - **/ -static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio, - dev_t dev, sector_t from, sector_t to) -{ - struct blk_trace *bt = q->blk_trace; - struct blk_io_trace_remap r; - - if (likely(!bt)) - return; - - r.device = cpu_to_be32(dev); - r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); - r.sector = cpu_to_be64(to); - - __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); -} - -/** - * blk_add_driver_data - Add binary message with driver-specific data - * @q: queue the io is for - * @rq: io request - * @data: driver-specific data - * @len: length of driver-specific data - * - * Description: - * Some drivers might want to write driver-specific data per request. - * - **/ -static inline void blk_add_driver_data(struct request_queue *q, - struct request *rq, - void *data, size_t len) -{ - struct blk_trace *bt = q->blk_trace; - - if (likely(!bt)) - return; - - if (blk_pc_request(rq)) - __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, - rq->errors, len, data); - else - __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, - 0, BLK_TA_DRV_DATA, rq->errors, len, data); -} - +extern void blk_add_driver_data(struct request_queue *q, struct request *rq, + void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); @@ -356,13 +195,8 @@ extern int blk_trace_remove(struct request_queue *q); #else /* !CONFIG_BLK_DEV_IO_TRACE */ #define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) #define blk_trace_shutdown(q) do { } while (0) -#define blk_add_trace_rq(q, rq, what) do { } while (0) -#define blk_add_trace_bio(q, rq, what) do { } while (0) -#define blk_add_trace_generic(q, rq, rw, what) do { } while (0) -#define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0) -#define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0) -#define blk_add_driver_data(q, rq, data, len) do {} while (0) #define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) +#define blk_add_driver_data(q, rq, data, len) do {} while (0) #define blk_trace_setup(q, name, dev, arg) (-ENOTTY) #define blk_trace_startstop(q, start) (-ENOTTY) #define blk_trace_remove(q) (-ENOTTY) diff --git a/include/trace/block.h b/include/trace/block.h new file mode 100644 index 000000000000..3cc2675ebf01 --- /dev/null +++ b/include/trace/block.h @@ -0,0 +1,60 @@ +#ifndef _TRACE_BLOCK_H +#define _TRACE_BLOCK_H + +#include +#include + +DEFINE_TRACE(block_rq_abort, + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); +DEFINE_TRACE(block_rq_insert, + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); +DEFINE_TRACE(block_rq_issue, + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); +DEFINE_TRACE(block_rq_requeue, + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); +DEFINE_TRACE(block_rq_complete, + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); +DEFINE_TRACE(block_bio_bounce, + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); +DEFINE_TRACE(block_bio_complete, + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); +DEFINE_TRACE(block_bio_backmerge, + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); +DEFINE_TRACE(block_bio_frontmerge, + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); +DEFINE_TRACE(block_bio_queue, + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); +DEFINE_TRACE(block_getrq, + TPPROTO(struct request_queue *q, struct bio *bio, int rw), + TPARGS(q, bio, rw)); +DEFINE_TRACE(block_sleeprq, + TPPROTO(struct request_queue *q, struct bio *bio, int rw), + TPARGS(q, bio, rw)); +DEFINE_TRACE(block_plug, + TPPROTO(struct request_queue *q), + TPARGS(q)); +DEFINE_TRACE(block_unplug_timer, + TPPROTO(struct request_queue *q), + TPARGS(q)); +DEFINE_TRACE(block_unplug_io, + TPPROTO(struct request_queue *q), + TPARGS(q)); +DEFINE_TRACE(block_split, + TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), + TPARGS(q, bio, pdu)); +DEFINE_TRACE(block_remap, + TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev, + sector_t from, sector_t to), + TPARGS(q, bio, dev, from, to)); + +#endif diff --git a/mm/bounce.c b/mm/bounce.c index 06722c403058..bd1caaa582b8 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #define POOL_SIZE 64 @@ -222,7 +223,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, if (!bio) return; - blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); + trace_block_bio_bounce(q, *bio_orig); /* * at least one page was bounced, fill in possible non-highmem -- cgit v1.2.3 From 0bfc24559d7945506184d86739fe365a181f06b7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 26 Nov 2008 11:59:56 +0100 Subject: blktrace: port to tracepoints, update Port to the new tracepoints API: split DEFINE_TRACE() and DECLARE_TRACE() sites. Spread them out to the usage sites, as suggested by Mathieu Desnoyers. Signed-off-by: Ingo Molnar Acked-by: Mathieu Desnoyers --- block/blk-core.c | 13 ++++++++ block/elevator.c | 5 +++ drivers/md/dm.c | 2 ++ fs/bio.c | 2 ++ include/trace/block.h | 84 ++++++++++++++++++++++++++++++--------------------- mm/bounce.c | 2 ++ 6 files changed, 74 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 04267d66a2b9..0c06cf5aaaf8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -32,6 +32,19 @@ #include "blk.h" +DEFINE_TRACE(block_plug); +DEFINE_TRACE(block_unplug_io); +DEFINE_TRACE(block_unplug_timer); +DEFINE_TRACE(block_getrq); +DEFINE_TRACE(block_sleeprq); +DEFINE_TRACE(block_rq_requeue); +DEFINE_TRACE(block_bio_backmerge); +DEFINE_TRACE(block_bio_frontmerge); +DEFINE_TRACE(block_bio_queue); +DEFINE_TRACE(block_rq_complete); +DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */ +EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); + static int __make_request(struct request_queue *q, struct bio *bio); /* diff --git a/block/elevator.c b/block/elevator.c index 530fcfe2ef07..e5677fe4f412 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -42,6 +42,8 @@ static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); +DEFINE_TRACE(block_rq_abort); + /* * Merge hash stuff. */ @@ -53,6 +55,9 @@ static const int elv_hash_shift = 6; #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) +DEFINE_TRACE(block_rq_insert); +DEFINE_TRACE(block_rq_issue); + /* * Query io scheduler to see if the current process issuing bio may be * merged with rq. diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d23fda178163..343094c3feeb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -52,6 +52,8 @@ struct dm_target_io { union map_info info; }; +DEFINE_TRACE(block_bio_complete); + union map_info *dm_get_mapinfo(struct bio *bio) { if (bio && bio->bi_private) diff --git a/fs/bio.c b/fs/bio.c index 060859c69092..df99c882b807 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -29,6 +29,8 @@ #include #include /* for struct sg_iovec */ +DEFINE_TRACE(block_split); + static struct kmem_cache *bio_slab __read_mostly; static mempool_t *bio_split_pool __read_mostly; diff --git a/include/trace/block.h b/include/trace/block.h index 3cc2675ebf01..25c6a1fd5b77 100644 --- a/include/trace/block.h +++ b/include/trace/block.h @@ -4,57 +4,73 @@ #include #include -DEFINE_TRACE(block_rq_abort, +DECLARE_TRACE(block_rq_abort, TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); -DEFINE_TRACE(block_rq_insert, + TPARGS(q, rq)); + +DECLARE_TRACE(block_rq_insert, TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); -DEFINE_TRACE(block_rq_issue, + TPARGS(q, rq)); + +DECLARE_TRACE(block_rq_issue, TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); -DEFINE_TRACE(block_rq_requeue, + TPARGS(q, rq)); + +DECLARE_TRACE(block_rq_requeue, TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); -DEFINE_TRACE(block_rq_complete, + TPARGS(q, rq)); + +DECLARE_TRACE(block_rq_complete, TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); -DEFINE_TRACE(block_bio_bounce, + TPARGS(q, rq)); + +DECLARE_TRACE(block_bio_bounce, TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); -DEFINE_TRACE(block_bio_complete, + TPARGS(q, bio)); + +DECLARE_TRACE(block_bio_complete, TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); -DEFINE_TRACE(block_bio_backmerge, + TPARGS(q, bio)); + +DECLARE_TRACE(block_bio_backmerge, TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); -DEFINE_TRACE(block_bio_frontmerge, + TPARGS(q, bio)); + +DECLARE_TRACE(block_bio_frontmerge, TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); -DEFINE_TRACE(block_bio_queue, + TPARGS(q, bio)); + +DECLARE_TRACE(block_bio_queue, TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); -DEFINE_TRACE(block_getrq, + TPARGS(q, bio)); + +DECLARE_TRACE(block_getrq, TPPROTO(struct request_queue *q, struct bio *bio, int rw), - TPARGS(q, bio, rw)); -DEFINE_TRACE(block_sleeprq, + TPARGS(q, bio, rw)); + +DECLARE_TRACE(block_sleeprq, TPPROTO(struct request_queue *q, struct bio *bio, int rw), - TPARGS(q, bio, rw)); -DEFINE_TRACE(block_plug, + TPARGS(q, bio, rw)); + +DECLARE_TRACE(block_plug, TPPROTO(struct request_queue *q), - TPARGS(q)); -DEFINE_TRACE(block_unplug_timer, + TPARGS(q)); + +DECLARE_TRACE(block_unplug_timer, TPPROTO(struct request_queue *q), - TPARGS(q)); -DEFINE_TRACE(block_unplug_io, + TPARGS(q)); + +DECLARE_TRACE(block_unplug_io, TPPROTO(struct request_queue *q), - TPARGS(q)); -DEFINE_TRACE(block_split, + TPARGS(q)); + +DECLARE_TRACE(block_split, TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), - TPARGS(q, bio, pdu)); -DEFINE_TRACE(block_remap, + TPARGS(q, bio, pdu)); + +DECLARE_TRACE(block_remap, TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev, sector_t from, sector_t to), - TPARGS(q, bio, dev, from, to)); + TPARGS(q, bio, dev, from, to)); #endif diff --git a/mm/bounce.c b/mm/bounce.c index bd1caaa582b8..bf0cf7c8387b 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -22,6 +22,8 @@ static mempool_t *page_pool, *isa_page_pool; +DEFINE_TRACE(block_bio_bounce); + #ifdef CONFIG_HIGHMEM static __init int init_emergency_pool(void) { -- cgit v1.2.3 From ce71e27c6fdc43c29f36d307b9100bde70c947fc Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Tue, 19 Aug 2008 20:43:25 +0300 Subject: SLUB: Replace __builtin_return_address(0) with _RET_IP_. This patch replaces __builtin_return_address(0) with _RET_IP_, since a previous patch moved _RET_IP_ and _THIS_IP_ to include/linux/kernel.h and they're widely available now. This makes for shorter and easier to read code. [penberg@cs.helsinki.fi: remove _RET_IP_ casts to void pointer] Signed-off-by: Eduard - Gabriel Munteanu Signed-off-by: Pekka Enberg --- include/linux/slab.h | 8 ++++---- mm/slab.c | 8 ++++---- mm/slub.c | 48 ++++++++++++++++++++++++------------------------ 3 files changed, 32 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 000da12b5cf0..c97ed28559ec 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -253,9 +253,9 @@ static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep, * request comes from. */ #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) -extern void *__kmalloc_track_caller(size_t, gfp_t, void*); +extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long); #define kmalloc_track_caller(size, flags) \ - __kmalloc_track_caller(size, flags, __builtin_return_address(0)) + __kmalloc_track_caller(size, flags, _RET_IP_) #else #define kmalloc_track_caller(size, flags) \ __kmalloc(size, flags) @@ -271,10 +271,10 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, void*); * allocation request comes from. */ #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) -extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *); +extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long); #define kmalloc_node_track_caller(size, flags, node) \ __kmalloc_node_track_caller(size, flags, node, \ - __builtin_return_address(0)) + _RET_IP_) #else #define kmalloc_node_track_caller(size, flags, node) \ __kmalloc_node(size, flags, node) diff --git a/mm/slab.c b/mm/slab.c index 09187517f9dc..a14787799014 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3686,9 +3686,9 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) EXPORT_SYMBOL(__kmalloc_node); void *__kmalloc_node_track_caller(size_t size, gfp_t flags, - int node, void *caller) + int node, unsigned long caller) { - return __do_kmalloc_node(size, flags, node, caller); + return __do_kmalloc_node(size, flags, node, (void *)caller); } EXPORT_SYMBOL(__kmalloc_node_track_caller); #else @@ -3730,9 +3730,9 @@ void *__kmalloc(size_t size, gfp_t flags) } EXPORT_SYMBOL(__kmalloc); -void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller) +void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller) { - return __do_kmalloc(size, flags, caller); + return __do_kmalloc(size, flags, (void *)caller); } EXPORT_SYMBOL(__kmalloc_track_caller); diff --git a/mm/slub.c b/mm/slub.c index 7ec2888bffc1..68ab260583d0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -182,7 +182,7 @@ static LIST_HEAD(slab_caches); * Tracking user of a slab. */ struct track { - void *addr; /* Called from address */ + unsigned long addr; /* Called from address */ int cpu; /* Was running on cpu */ int pid; /* Pid context */ unsigned long when; /* When did the operation occur */ @@ -371,7 +371,7 @@ static struct track *get_track(struct kmem_cache *s, void *object, } static void set_track(struct kmem_cache *s, void *object, - enum track_item alloc, void *addr) + enum track_item alloc, unsigned long addr) { struct track *p; @@ -395,8 +395,8 @@ static void init_tracking(struct kmem_cache *s, void *object) if (!(s->flags & SLAB_STORE_USER)) return; - set_track(s, object, TRACK_FREE, NULL); - set_track(s, object, TRACK_ALLOC, NULL); + set_track(s, object, TRACK_FREE, 0UL); + set_track(s, object, TRACK_ALLOC, 0UL); } static void print_track(const char *s, struct track *t) @@ -405,7 +405,7 @@ static void print_track(const char *s, struct track *t) return; printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n", - s, t->addr, jiffies - t->when, t->cpu, t->pid); + s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid); } static void print_tracking(struct kmem_cache *s, void *object) @@ -870,7 +870,7 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page, } static int alloc_debug_processing(struct kmem_cache *s, struct page *page, - void *object, void *addr) + void *object, unsigned long addr) { if (!check_slab(s, page)) goto bad; @@ -910,7 +910,7 @@ bad: } static int free_debug_processing(struct kmem_cache *s, struct page *page, - void *object, void *addr) + void *object, unsigned long addr) { if (!check_slab(s, page)) goto fail; @@ -1033,10 +1033,10 @@ static inline void setup_object_debug(struct kmem_cache *s, struct page *page, void *object) {} static inline int alloc_debug_processing(struct kmem_cache *s, - struct page *page, void *object, void *addr) { return 0; } + struct page *page, void *object, unsigned long addr) { return 0; } static inline int free_debug_processing(struct kmem_cache *s, - struct page *page, void *object, void *addr) { return 0; } + struct page *page, void *object, unsigned long addr) { return 0; } static inline int slab_pad_check(struct kmem_cache *s, struct page *page) { return 1; } @@ -1503,8 +1503,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node) * we need to allocate a new slab. This is the slowest path since it involves * a call to the page allocator and the setup of a new slab. */ -static void *__slab_alloc(struct kmem_cache *s, - gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) +static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, + unsigned long addr, struct kmem_cache_cpu *c) { void **object; struct page *new; @@ -1588,7 +1588,7 @@ debug: * Otherwise we can simply pick the next object from the lockless free list. */ static __always_inline void *slab_alloc(struct kmem_cache *s, - gfp_t gfpflags, int node, void *addr) + gfp_t gfpflags, int node, unsigned long addr) { void **object; struct kmem_cache_cpu *c; @@ -1617,14 +1617,14 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) { - return slab_alloc(s, gfpflags, -1, __builtin_return_address(0)); + return slab_alloc(s, gfpflags, -1, _RET_IP_); } EXPORT_SYMBOL(kmem_cache_alloc); #ifdef CONFIG_NUMA void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { - return slab_alloc(s, gfpflags, node, __builtin_return_address(0)); + return slab_alloc(s, gfpflags, node, _RET_IP_); } EXPORT_SYMBOL(kmem_cache_alloc_node); #endif @@ -1638,7 +1638,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node); * handling required then we can return immediately. */ static void __slab_free(struct kmem_cache *s, struct page *page, - void *x, void *addr, unsigned int offset) + void *x, unsigned long addr, unsigned int offset) { void *prior; void **object = (void *)x; @@ -1708,7 +1708,7 @@ debug: * with all sorts of special processing. */ static __always_inline void slab_free(struct kmem_cache *s, - struct page *page, void *x, void *addr) + struct page *page, void *x, unsigned long addr) { void **object = (void *)x; struct kmem_cache_cpu *c; @@ -1735,7 +1735,7 @@ void kmem_cache_free(struct kmem_cache *s, void *x) page = virt_to_head_page(x); - slab_free(s, page, x, __builtin_return_address(0)); + slab_free(s, page, x, _RET_IP_); } EXPORT_SYMBOL(kmem_cache_free); @@ -2663,7 +2663,7 @@ void *__kmalloc(size_t size, gfp_t flags) if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - return slab_alloc(s, flags, -1, __builtin_return_address(0)); + return slab_alloc(s, flags, -1, _RET_IP_); } EXPORT_SYMBOL(__kmalloc); @@ -2691,7 +2691,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - return slab_alloc(s, flags, node, __builtin_return_address(0)); + return slab_alloc(s, flags, node, _RET_IP_); } EXPORT_SYMBOL(__kmalloc_node); #endif @@ -2748,7 +2748,7 @@ void kfree(const void *x) put_page(page); return; } - slab_free(page->slab, page, object, __builtin_return_address(0)); + slab_free(page->slab, page, object, _RET_IP_); } EXPORT_SYMBOL(kfree); @@ -3204,7 +3204,7 @@ static struct notifier_block __cpuinitdata slab_notifier = { #endif -void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) +void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) { struct kmem_cache *s; @@ -3220,7 +3220,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) } void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, - int node, void *caller) + int node, unsigned long caller) { struct kmem_cache *s; @@ -3431,7 +3431,7 @@ static void resiliency_test(void) {}; struct location { unsigned long count; - void *addr; + unsigned long addr; long long sum_time; long min_time; long max_time; @@ -3479,7 +3479,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, { long start, end, pos; struct location *l; - void *caddr; + unsigned long caddr; unsigned long age = jiffies - track->when; start = -1; -- cgit v1.2.3 From e2f367f269fe19375f10e63efe0f2a6d3ddef8e6 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 21 Nov 2008 19:01:30 +0200 Subject: nl80211: Report max TX power in NL80211_BAND_ATTR_FREQS This is useful information to provide for userspace (e.g., hostapd needs this to generate Country IE). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ net/wireless/nl80211.c | 3 +++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 79827345351d..54d6ebe38e39 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -508,6 +508,7 @@ enum nl80211_band_attr { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory * on this channel in current regulatory domain. + * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in dBm. */ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, @@ -516,12 +517,15 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_PASSIVE_SCAN, NL80211_FREQUENCY_ATTR_NO_IBSS, NL80211_FREQUENCY_ATTR_RADAR, + NL80211_FREQUENCY_ATTR_MAX_TX_POWER, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, NL80211_FREQUENCY_ATTR_MAX = __NL80211_FREQUENCY_ATTR_AFTER_LAST - 1 }; +#define NL80211_FREQUENCY_ATTR_MAX_TX_POWER NL80211_FREQUENCY_ATTR_MAX_TX_POWER + /** * enum nl80211_bitrate_attr - bitrate attributes * @NL80211_BITRATE_ATTR_RATE: Bitrate in units of 100 kbps diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 00121ceddb14..2e8464eaaaa2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -198,6 +198,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (chan->flags & IEEE80211_CHAN_RADAR) NLA_PUT_FLAG(msg, NL80211_FREQUENCY_ATTR_RADAR); + NLA_PUT_U8(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, + chan->max_power); + nla_nest_end(msg, nl_freq); } -- cgit v1.2.3 From f80b5e99c7dac5a9a0d72496cec5075a12cd1476 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Fri, 21 Nov 2008 20:40:09 -0200 Subject: rfkill: preserve state across suspend The rfkill class API requires that the driver connected to a class call rfkill_force_state() on resume to update the real state of the rfkill controller, OR that it provides a get_state() hook. This means there is potentially a hidden call in the resume code flow that changes rfkill->state (i.e. rfkill_force_state()), so the previous state of the transmitter was being lost. The simplest and most future-proof way to fix this is to explicitly store the pre-sleep state on the rfkill structure, and restore from that on resume. Signed-off-by: Henrique de Moraes Holschuh Acked-by: Ivo van Doorn Cc: Matthew Garrett Cc: Alan Jenkins Signed-off-by: John W. Linville --- include/linux/rfkill.h | 1 + net/rfkill/rfkill.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 4cd64b0d9825..f376a93927f7 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -108,6 +108,7 @@ struct rfkill { struct device dev; struct list_head node; + enum rfkill_state state_for_resume; }; #define to_rfkill(d) container_of(d, struct rfkill, dev) diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index ec26eae8004d..5ad411d3e8f8 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -565,10 +565,15 @@ static void rfkill_release(struct device *dev) #ifdef CONFIG_PM static int rfkill_suspend(struct device *dev, pm_message_t state) { + struct rfkill *rfkill = to_rfkill(dev); + /* mark class device as suspended */ if (dev->power.power_state.event != state.event) dev->power.power_state = state; + /* store state for the resume handler */ + rfkill->state_for_resume = rfkill->state; + return 0; } @@ -590,7 +595,7 @@ static int rfkill_resume(struct device *dev) rfkill_toggle_radio(rfkill, rfkill_epo_lock_active ? RFKILL_STATE_SOFT_BLOCKED : - rfkill->state, + rfkill->state_for_resume, 1); mutex_unlock(&rfkill->mutex); -- cgit v1.2.3 From bf8c1ac6d81ba8c0e4dc2215f84f5e2a3c8227e8 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 22 Nov 2008 22:00:31 +0200 Subject: nl80211: Change max TX power to be in mBm instead of dBm In order to be consistent with NL80211_ATTR_POWER_RULE_MAX_EIRP, change NL80211_FREQUENCY_ATTR_MAX_TX_POWER to use mBm and U32 instead of dBm and U8. This is a userspace interface change, but the previous version had not yet been pushed upstream and there are no userspace programs using this yet, so there is justification to get this change in as long as it goes in before the previous version gets out. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 3 ++- net/wireless/nl80211.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 54d6ebe38e39..e08c8bcfb78d 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -508,7 +508,8 @@ enum nl80211_band_attr { * on this channel in current regulatory domain. * @NL80211_FREQUENCY_ATTR_RADAR: Radar detection is mandatory * on this channel in current regulatory domain. - * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in dBm. + * @NL80211_FREQUENCY_ATTR_MAX_TX_POWER: Maximum transmission power in mBm + * (100 * dBm). */ enum nl80211_frequency_attr { __NL80211_FREQUENCY_ATTR_INVALID, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2e8464eaaaa2..c9141e3df9ba 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -198,8 +198,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (chan->flags & IEEE80211_CHAN_RADAR) NLA_PUT_FLAG(msg, NL80211_FREQUENCY_ATTR_RADAR); - NLA_PUT_U8(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, - chan->max_power); + NLA_PUT_U32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, + DBM_TO_MBM(chan->max_power)); nla_nest_end(msg, nl_freq); } -- cgit v1.2.3 From 244e6c2d0724bc4908a1995804704bdee3b31528 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 26 Nov 2008 15:24:32 -0800 Subject: pkt_sched: gen_estimator: Optimize gen_estimator_active() Since all other gen_estimator functions use bstats and rate_est params together, and searching for them is optimized now, let's use this also in gen_estimator_active(). The return type of gen_estimator_active() is changed to bool, and gen_find_node() parameters to const, btw. In tcf_act_police_locate() a check for ACT_P_CREATED is added before calling gen_estimator_active(). Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/net/gen_stats.h | 4 ++-- net/core/gen_estimator.c | 25 ++++++++----------------- net/sched/act_police.c | 4 +++- 3 files changed, 13 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index dcf5bfa7d4f1..d136b5240ef2 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -45,6 +45,6 @@ extern void gen_kill_estimator(struct gnet_stats_basic *bstats, extern int gen_replace_estimator(struct gnet_stats_basic *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt); -extern int gen_estimator_active(const struct gnet_stats_rate_est *rate_est); - +extern bool gen_estimator_active(const struct gnet_stats_basic *bstats, + const struct gnet_stats_rate_est *rate_est); #endif diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 3885550f0187..9cc9f95b109e 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -163,8 +163,9 @@ static void gen_add_node(struct gen_estimator *est) rb_insert_color(&est->node, &est_root); } -static struct gen_estimator *gen_find_node(struct gnet_stats_basic *bstats, - struct gnet_stats_rate_est *rate_est) +static +struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats, + const struct gnet_stats_rate_est *rate_est) { struct rb_node *p = est_root.rb_node; @@ -301,26 +302,16 @@ EXPORT_SYMBOL(gen_replace_estimator); /** * gen_estimator_active - test if estimator is currently in use + * @bstats: basic statistics * @rate_est: rate estimator statistics * - * Returns 1 if estimator is active, and 0 if not. + * Returns true if estimator is active, and false if not. */ -int gen_estimator_active(const struct gnet_stats_rate_est *rate_est) +bool gen_estimator_active(const struct gnet_stats_basic *bstats, + const struct gnet_stats_rate_est *rate_est) { - int idx; - struct gen_estimator *e; - ASSERT_RTNL(); - for (idx=0; idx <= EST_MAX_INTERVAL; idx++) { - if (!elist[idx].timer.function) - continue; - - list_for_each_entry(e, &elist[idx].list, list) { - if (e->rate_est == rate_est) - return 1; - } - } - return 0; + return gen_find_node(bstats, rate_est) != NULL; } EXPORT_SYMBOL(gen_estimator_active); diff --git a/net/sched/act_police.c b/net/sched/act_police.c index c39f60cea6ee..5c72a116b1a4 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -183,7 +183,9 @@ override: if (R_tab == NULL) goto failure; - if (!est && !gen_estimator_active(&police->tcf_rate_est)) { + if (!est && (ret == ACT_P_CREATED || + !gen_estimator_active(&police->tcf_bstats, + &police->tcf_rate_est))) { err = -EINVAL; goto failure; } -- cgit v1.2.3 From c4106aa88a440430d387e022f2ad6dc1e0d52e98 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 27 Nov 2008 00:12:47 -0800 Subject: decnet: remove private wrappers of endian helpers Signed-off-by: Harvey Harrison Reviewed-by: Steven Whitehouse Signed-off-by: David S. Miller --- include/net/dn.h | 8 ++---- include/net/dn_fib.h | 6 ++-- net/decnet/af_decnet.c | 62 +++++++++++++++++++++--------------------- net/decnet/dn_dev.c | 20 +++++++------- net/decnet/dn_neigh.c | 18 ++++++------ net/decnet/dn_nsp_in.c | 28 ++++++++++--------- net/decnet/dn_nsp_out.c | 22 +++++++-------- net/decnet/dn_route.c | 18 ++++++------ net/decnet/dn_table.c | 2 +- net/decnet/sysctl_net_decnet.c | 4 +-- 10 files changed, 94 insertions(+), 94 deletions(-) (limited to 'include') diff --git a/include/net/dn.h b/include/net/dn.h index 627778384c84..7cd6bb83f7ab 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -4,9 +4,7 @@ #include #include #include - -#define dn_ntohs(x) le16_to_cpu(x) -#define dn_htons(x) cpu_to_le16(x) +#include struct dn_scp /* Session Control Port */ { @@ -175,7 +173,7 @@ struct dn_skb_cb { static inline __le16 dn_eth2dn(unsigned char *ethaddr) { - return dn_htons(ethaddr[4] | (ethaddr[5] << 8)); + return get_unaligned((__le16 *)(ethaddr + 4)); } static inline __le16 dn_saddr2dn(struct sockaddr_dn *saddr) @@ -185,7 +183,7 @@ static inline __le16 dn_saddr2dn(struct sockaddr_dn *saddr) static inline void dn_dn2eth(unsigned char *ethaddr, __le16 addr) { - __u16 a = dn_ntohs(addr); + __u16 a = le16_to_cpu(addr); ethaddr[0] = 0xAA; ethaddr[1] = 0x00; ethaddr[2] = 0x04; diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index 30125119c950..c378be7bf960 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -181,9 +181,9 @@ static inline void dn_fib_res_put(struct dn_fib_res *res) static inline __le16 dnet_make_mask(int n) { - if (n) - return dn_htons(~((1<<(16-n))-1)); - return 0; + if (n) + return cpu_to_le16(~((1 << (16 - n)) - 1)); + return cpu_to_le16(0); } #endif /* _NET_DN_FIB_H */ diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 3c23ab33dbc0..cf0e18499297 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -167,7 +167,7 @@ static struct hlist_head *dn_find_list(struct sock *sk) if (scp->addr.sdn_flags & SDF_WILD) return hlist_empty(&dn_wild_sk) ? &dn_wild_sk : NULL; - return &dn_sk_hash[dn_ntohs(scp->addrloc) & DN_SK_HASH_MASK]; + return &dn_sk_hash[le16_to_cpu(scp->addrloc) & DN_SK_HASH_MASK]; } /* @@ -181,7 +181,7 @@ static int check_port(__le16 port) if (port == 0) return -1; - sk_for_each(sk, node, &dn_sk_hash[dn_ntohs(port) & DN_SK_HASH_MASK]) { + sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) { struct dn_scp *scp = DN_SK(sk); if (scp->addrloc == port) return -1; @@ -195,12 +195,12 @@ static unsigned short port_alloc(struct sock *sk) static unsigned short port = 0x2000; unsigned short i_port = port; - while(check_port(dn_htons(++port)) != 0) { + while(check_port(cpu_to_le16(++port)) != 0) { if (port == i_port) return 0; } - scp->addrloc = dn_htons(port); + scp->addrloc = cpu_to_le16(port); return 1; } @@ -255,7 +255,7 @@ static struct hlist_head *listen_hash(struct sockaddr_dn *addr) if (hash == 0) { hash = addr->sdn_objnamel; - for(i = 0; i < dn_ntohs(addr->sdn_objnamel); i++) { + for(i = 0; i < le16_to_cpu(addr->sdn_objnamel); i++) { hash ^= addr->sdn_objname[i]; hash ^= (hash << 3); } @@ -297,16 +297,16 @@ int dn_sockaddr2username(struct sockaddr_dn *sdn, unsigned char *buf, unsigned c break; case 1: *buf++ = 0; - *buf++ = dn_ntohs(sdn->sdn_objnamel); - memcpy(buf, sdn->sdn_objname, dn_ntohs(sdn->sdn_objnamel)); - len = 3 + dn_ntohs(sdn->sdn_objnamel); + *buf++ = le16_to_cpu(sdn->sdn_objnamel); + memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); + len = 3 + le16_to_cpu(sdn->sdn_objnamel); break; case 2: memset(buf, 0, 5); buf += 5; - *buf++ = dn_ntohs(sdn->sdn_objnamel); - memcpy(buf, sdn->sdn_objname, dn_ntohs(sdn->sdn_objnamel)); - len = 7 + dn_ntohs(sdn->sdn_objnamel); + *buf++ = le16_to_cpu(sdn->sdn_objnamel); + memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel)); + len = 7 + le16_to_cpu(sdn->sdn_objnamel); break; } @@ -327,7 +327,7 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, int namel = 12; sdn->sdn_objnum = 0; - sdn->sdn_objnamel = dn_htons(0); + sdn->sdn_objnamel = cpu_to_le16(0); memset(sdn->sdn_objname, 0, DN_MAXOBJL); if (len < 2) @@ -361,13 +361,13 @@ int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, if (len < 0) return -1; - sdn->sdn_objnamel = dn_htons(*data++); - len -= dn_ntohs(sdn->sdn_objnamel); + sdn->sdn_objnamel = cpu_to_le16(*data++); + len -= le16_to_cpu(sdn->sdn_objnamel); - if ((len < 0) || (dn_ntohs(sdn->sdn_objnamel) > namel)) + if ((len < 0) || (le16_to_cpu(sdn->sdn_objnamel) > namel)) return -1; - memcpy(sdn->sdn_objname, data, dn_ntohs(sdn->sdn_objnamel)); + memcpy(sdn->sdn_objname, data, le16_to_cpu(sdn->sdn_objnamel)); return size - len; } @@ -391,7 +391,7 @@ struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr) continue; if (scp->addr.sdn_objnamel != addr->sdn_objnamel) continue; - if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, dn_ntohs(addr->sdn_objnamel)) != 0) + if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, le16_to_cpu(addr->sdn_objnamel)) != 0) continue; } sock_hold(sk); @@ -419,7 +419,7 @@ struct sock *dn_find_by_skb(struct sk_buff *skb) struct dn_scp *scp; read_lock(&dn_hash_lock); - sk_for_each(sk, node, &dn_sk_hash[dn_ntohs(cb->dst_port) & DN_SK_HASH_MASK]) { + sk_for_each(sk, node, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) { scp = DN_SK(sk); if (cb->src != dn_saddr2dn(&scp->peer)) continue; @@ -734,10 +734,10 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (saddr->sdn_family != AF_DECnet) return -EINVAL; - if (dn_ntohs(saddr->sdn_nodeaddrl) && (dn_ntohs(saddr->sdn_nodeaddrl) != 2)) + if (le16_to_cpu(saddr->sdn_nodeaddrl) && (le16_to_cpu(saddr->sdn_nodeaddrl) != 2)) return -EINVAL; - if (dn_ntohs(saddr->sdn_objnamel) > DN_MAXOBJL) + if (le16_to_cpu(saddr->sdn_objnamel) > DN_MAXOBJL) return -EINVAL; if (saddr->sdn_flags & ~SDF_WILD) @@ -748,7 +748,7 @@ static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) return -EACCES; if (!(saddr->sdn_flags & SDF_WILD)) { - if (dn_ntohs(saddr->sdn_nodeaddrl)) { + if (le16_to_cpu(saddr->sdn_nodeaddrl)) { read_lock(&dev_base_lock); ldev = NULL; for_each_netdev(&init_net, dev) { @@ -799,15 +799,15 @@ static int dn_auto_bind(struct socket *sock) if ((scp->accessdata.acc_accl != 0) && (scp->accessdata.acc_accl <= 12)) { - scp->addr.sdn_objnamel = dn_htons(scp->accessdata.acc_accl); - memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, dn_ntohs(scp->addr.sdn_objnamel)); + scp->addr.sdn_objnamel = cpu_to_le16(scp->accessdata.acc_accl); + memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, le16_to_cpu(scp->addr.sdn_objnamel)); scp->accessdata.acc_accl = 0; memset(scp->accessdata.acc_acc, 0, 40); } /* End of compatibility stuff */ - scp->addr.sdn_add.a_len = dn_htons(2); + scp->addr.sdn_add.a_len = cpu_to_le16(2); rv = dn_dev_bind_default((__le16 *)scp->addr.sdn_add.a_addr); if (rv == 0) { rv = dn_hash_sock(sk); @@ -1027,7 +1027,7 @@ static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt) u16 len = *ptr++; /* yes, it's 8bit on the wire */ BUG_ON(len > 16); /* we've checked the contents earlier */ - opt->opt_optl = dn_htons(len); + opt->opt_optl = cpu_to_le16(len); opt->opt_status = 0; memcpy(opt->opt_data, ptr, len); skb_pull(skb, len + 1); @@ -1375,7 +1375,7 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us if (optlen != sizeof(struct optdata_dn)) return -EINVAL; - if (dn_ntohs(u.opt.opt_optl) > 16) + if (le16_to_cpu(u.opt.opt_optl) > 16) return -EINVAL; memcpy(&scp->conndata_out, &u.opt, optlen); @@ -1388,7 +1388,7 @@ static int __dn_setsockopt(struct socket *sock, int level,int optname, char __us if (optlen != sizeof(struct optdata_dn)) return -EINVAL; - if (dn_ntohs(u.opt.opt_optl) > 16) + if (le16_to_cpu(u.opt.opt_optl) > 16) return -EINVAL; memcpy(&scp->discdata_out, &u.opt, optlen); @@ -2213,12 +2213,12 @@ static void dn_printable_object(struct sockaddr_dn *dn, unsigned char *buf) { int i; - switch (dn_ntohs(dn->sdn_objnamel)) { + switch (le16_to_cpu(dn->sdn_objnamel)) { case 0: sprintf(buf, "%d", dn->sdn_objnum); break; default: - for (i = 0; i < dn_ntohs(dn->sdn_objnamel); i++) { + for (i = 0; i < le16_to_cpu(dn->sdn_objnamel); i++) { buf[i] = dn->sdn_objname[i]; if (IS_NOT_PRINTABLE(buf[i])) buf[i] = '.'; @@ -2281,7 +2281,7 @@ static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk) seq_printf(seq, "%6s/%04X %04d:%04d %04d:%04d %01d %-16s " "%6s/%04X %04d:%04d %04d:%04d %01d %-16s %4s %s\n", - dn_addr2asc(dn_ntohs(dn_saddr2dn(&scp->addr)), buf1), + dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->addr)), buf1), scp->addrloc, scp->numdat, scp->numoth, @@ -2289,7 +2289,7 @@ static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk) scp->ackxmt_oth, scp->flowloc_sw, local_object, - dn_addr2asc(dn_ntohs(dn_saddr2dn(&scp->peer)), buf2), + dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->peer)), buf2), scp->addrrem, scp->numdat_rcv, scp->numoth_rcv, diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 28e26bd08e24..424d86ac12fe 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -885,7 +885,7 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) memcpy(msg->tiver, dn_eco_version, 3); dn_dn2eth(msg->id, ifa->ifa_local); msg->iinfo = DN_RT_INFO_ENDN; - msg->blksize = dn_htons(mtu2blksize(dev)); + msg->blksize = cpu_to_le16(mtu2blksize(dev)); msg->area = 0x00; memset(msg->seed, 0, 8); memcpy(msg->neighbor, dn_hiord, ETH_ALEN); @@ -895,13 +895,13 @@ static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa) dn_dn2eth(msg->neighbor, dn->addr); } - msg->timer = dn_htons((unsigned short)dn_db->parms.t3); + msg->timer = cpu_to_le16((unsigned short)dn_db->parms.t3); msg->mpd = 0x00; msg->datalen = 0x02; memset(msg->data, 0xAA, 2); pktlen = (__le16 *)skb_push(skb,2); - *pktlen = dn_htons(skb->len - 2); + *pktlen = cpu_to_le16(skb->len - 2); skb_reset_network_header(skb); @@ -929,7 +929,7 @@ static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn if (dn->priority != dn_db->parms.priority) return 0; - if (dn_ntohs(dn->addr) < dn_ntohs(ifa->ifa_local)) + if (le16_to_cpu(dn->addr) < le16_to_cpu(ifa->ifa_local)) return 1; return 0; @@ -973,11 +973,11 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) ptr += ETH_ALEN; *ptr++ = dn_db->parms.forwarding == 1 ? DN_RT_INFO_L1RT : DN_RT_INFO_L2RT; - *((__le16 *)ptr) = dn_htons(mtu2blksize(dev)); + *((__le16 *)ptr) = cpu_to_le16(mtu2blksize(dev)); ptr += 2; *ptr++ = dn_db->parms.priority; /* Priority */ *ptr++ = 0; /* Area: Reserved */ - *((__le16 *)ptr) = dn_htons((unsigned short)dn_db->parms.t3); + *((__le16 *)ptr) = cpu_to_le16((unsigned short)dn_db->parms.t3); ptr += 2; *ptr++ = 0; /* MPD: Reserved */ i1 = ptr++; @@ -993,7 +993,7 @@ static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa) skb_trim(skb, (27 + *i2)); pktlen = (__le16 *)skb_push(skb, 2); - *pktlen = dn_htons(skb->len - 2); + *pktlen = cpu_to_le16(skb->len - 2); skb_reset_network_header(skb); @@ -1401,8 +1401,8 @@ static int dn_dev_seq_show(struct seq_file *seq, void *v) mtu2blksize(dev), dn_db->parms.priority, dn_db->parms.state, dn_db->parms.name, - dn_db->router ? dn_addr2asc(dn_ntohs(*(__le16 *)dn_db->router->primary_key), router_buf) : "", - dn_db->peer ? dn_addr2asc(dn_ntohs(*(__le16 *)dn_db->peer->primary_key), peer_buf) : ""); + dn_db->router ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->router->primary_key), router_buf) : "", + dn_db->peer ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->peer->primary_key), peer_buf) : ""); } return 0; } @@ -1445,7 +1445,7 @@ void __init dn_dev_init(void) return; } - decnet_address = dn_htons((addr[0] << 10) | addr[1]); + decnet_address = cpu_to_le16((addr[0] << 10) | addr[1]); dn_dev_devices_on(); diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 1ca13b17974d..05b5aa05e50e 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -250,7 +250,7 @@ static int dn_long_output(struct sk_buff *skb) data = skb_push(skb, sizeof(struct dn_long_packet) + 3); lp = (struct dn_long_packet *)(data+3); - *((__le16 *)data) = dn_htons(skb->len - 2); + *((__le16 *)data) = cpu_to_le16(skb->len - 2); *(data + 2) = 1 | DN_RT_F_PF; /* Padding */ lp->msgflg = DN_RT_PKT_LONG|(cb->rt_flags&(DN_RT_F_IE|DN_RT_F_RQR|DN_RT_F_RTS)); @@ -294,7 +294,7 @@ static int dn_short_output(struct sk_buff *skb) } data = skb_push(skb, sizeof(struct dn_short_packet) + 2); - *((__le16 *)data) = dn_htons(skb->len - 2); + *((__le16 *)data) = cpu_to_le16(skb->len - 2); sp = (struct dn_short_packet *)(data+2); sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS)); @@ -336,12 +336,12 @@ static int dn_phase3_output(struct sk_buff *skb) } data = skb_push(skb, sizeof(struct dn_short_packet) + 2); - *((__le16 *)data) = dn_htons(skb->len - 2); + *((__le16 *)data) = cpu_to_le16(skb->len - 2); sp = (struct dn_short_packet *)(data + 2); sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS)); - sp->dstnode = cb->dst & dn_htons(0x03ff); - sp->srcnode = cb->src & dn_htons(0x03ff); + sp->dstnode = cb->dst & cpu_to_le16(0x03ff); + sp->srcnode = cb->src & cpu_to_le16(0x03ff); sp->forward = cb->hops & 0x3f; skb_reset_network_header(skb); @@ -394,7 +394,7 @@ int dn_neigh_router_hello(struct sk_buff *skb) if (neigh->dev->type == ARPHRD_ETHER) memcpy(neigh->ha, ð_hdr(skb)->h_source, ETH_ALEN); - dn->blksize = dn_ntohs(msg->blksize); + dn->blksize = le16_to_cpu(msg->blksize); dn->priority = msg->priority; dn->flags &= ~DN_NDFLAG_P3; @@ -410,7 +410,7 @@ int dn_neigh_router_hello(struct sk_buff *skb) } /* Only use routers in our area */ - if ((dn_ntohs(src)>>10) == (dn_ntohs((decnet_address))>>10)) { + if ((le16_to_cpu(src)>>10) == (le16_to_cpu((decnet_address))>>10)) { if (!dn_db->router) { dn_db->router = neigh_clone(neigh); } else { @@ -453,7 +453,7 @@ int dn_neigh_endnode_hello(struct sk_buff *skb) if (neigh->dev->type == ARPHRD_ETHER) memcpy(neigh->ha, ð_hdr(skb)->h_source, ETH_ALEN); dn->flags &= ~(DN_NDFLAG_R1 | DN_NDFLAG_R2); - dn->blksize = dn_ntohs(msg->blksize); + dn->blksize = le16_to_cpu(msg->blksize); dn->priority = 0; } @@ -543,7 +543,7 @@ static inline void dn_neigh_format_entry(struct seq_file *seq, read_lock(&n->lock); seq_printf(seq, "%-7s %s%s%s %02x %02d %07ld %-8s\n", - dn_addr2asc(dn_ntohs(dn->addr), buf), + dn_addr2asc(le16_to_cpu(dn->addr), buf), (dn->flags&DN_NDFLAG_R1) ? "1" : "-", (dn->flags&DN_NDFLAG_R2) ? "2" : "-", (dn->flags&DN_NDFLAG_P3) ? "3" : "-", diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 4074a6e5d0de..5d8a2a56fd39 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -83,7 +83,9 @@ static void dn_log_martian(struct sk_buff *skb, const char *msg) if (decnet_log_martians && net_ratelimit()) { char *devname = skb->dev ? skb->dev->name : "???"; struct dn_skb_cb *cb = DN_SKB_CB(skb); - printk(KERN_INFO "DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n", msg, devname, dn_ntohs(cb->src), dn_ntohs(cb->dst), dn_ntohs(cb->src_port), dn_ntohs(cb->dst_port)); + printk(KERN_INFO "DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n", + msg, devname, le16_to_cpu(cb->src), le16_to_cpu(cb->dst), + le16_to_cpu(cb->src_port), le16_to_cpu(cb->dst_port)); } } @@ -133,7 +135,7 @@ static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth) if (skb->len < 2) return len; - if ((ack = dn_ntohs(*ptr)) & 0x8000) { + if ((ack = le16_to_cpu(*ptr)) & 0x8000) { skb_pull(skb, 2); ptr++; len += 2; @@ -147,7 +149,7 @@ static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth) if (skb->len < 2) return len; - if ((ack = dn_ntohs(*ptr)) & 0x8000) { + if ((ack = le16_to_cpu(*ptr)) & 0x8000) { skb_pull(skb, 2); len += 2; if ((ack & 0x4000) == 0) { @@ -237,7 +239,7 @@ static struct sock *dn_find_listener(struct sk_buff *skb, unsigned short *reason cb->dst_port = msg->dstaddr; cb->services = msg->services; cb->info = msg->info; - cb->segsize = dn_ntohs(msg->segsize); + cb->segsize = le16_to_cpu(msg->segsize); if (!pskb_may_pull(skb, sizeof(*msg))) goto err_out; @@ -344,7 +346,7 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb) ptr = skb->data; cb->services = *ptr++; cb->info = *ptr++; - cb->segsize = dn_ntohs(*(__le16 *)ptr); + cb->segsize = le16_to_cpu(*(__le16 *)ptr); if ((scp->state == DN_CI) || (scp->state == DN_CD)) { scp->persist = 0; @@ -361,7 +363,7 @@ static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb) if (skb->len > 0) { u16 dlen = *skb->data; if ((dlen <= 16) && (dlen <= skb->len)) { - scp->conndata_in.opt_optl = dn_htons(dlen); + scp->conndata_in.opt_optl = cpu_to_le16(dlen); skb_copy_from_linear_data_offset(skb, 1, scp->conndata_in.opt_data, dlen); } @@ -396,17 +398,17 @@ static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb) if (skb->len < 2) goto out; - reason = dn_ntohs(*(__le16 *)skb->data); + reason = le16_to_cpu(*(__le16 *)skb->data); skb_pull(skb, 2); - scp->discdata_in.opt_status = dn_htons(reason); + scp->discdata_in.opt_status = cpu_to_le16(reason); scp->discdata_in.opt_optl = 0; memset(scp->discdata_in.opt_data, 0, 16); if (skb->len > 0) { u16 dlen = *skb->data; if ((dlen <= 16) && (dlen <= skb->len)) { - scp->discdata_in.opt_optl = dn_htons(dlen); + scp->discdata_in.opt_optl = cpu_to_le16(dlen); skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen); } } @@ -463,7 +465,7 @@ static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb) if (skb->len != 2) goto out; - reason = dn_ntohs(*(__le16 *)skb->data); + reason = le16_to_cpu(*(__le16 *)skb->data); sk->sk_state = TCP_CLOSE; @@ -512,7 +514,7 @@ static void dn_nsp_linkservice(struct sock *sk, struct sk_buff *skb) if (skb->len != 4) goto out; - segnum = dn_ntohs(*(__le16 *)ptr); + segnum = le16_to_cpu(*(__le16 *)ptr); ptr += 2; lsflags = *(unsigned char *)ptr++; fcval = *ptr; @@ -620,7 +622,7 @@ static void dn_nsp_otherdata(struct sock *sk, struct sk_buff *skb) if (skb->len < 2) goto out; - cb->segnum = segnum = dn_ntohs(*(__le16 *)skb->data); + cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data); skb_pull(skb, 2); if (seq_next(scp->numoth_rcv, segnum)) { @@ -648,7 +650,7 @@ static void dn_nsp_data(struct sock *sk, struct sk_buff *skb) if (skb->len < 2) goto out; - cb->segnum = segnum = dn_ntohs(*(__le16 *)skb->data); + cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data); skb_pull(skb, 2); if (seq_next(scp->numdat_rcv, segnum)) { diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 81a40ff10088..2013c25b7f5a 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -325,8 +325,8 @@ static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned c ptr = (__le16 *)dn_mk_common_header(scp, skb, msgflag, hlen); - *ptr++ = dn_htons(acknum); - *ptr++ = dn_htons(ackcrs); + *ptr++ = cpu_to_le16(acknum); + *ptr++ = cpu_to_le16(ackcrs); return ptr; } @@ -344,7 +344,7 @@ static __le16 *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *skb, int o cb->segnum = scp->numdat; seq_add(&scp->numdat, 1); } - *(ptr++) = dn_htons(cb->segnum); + *(ptr++) = cpu_to_le16(cb->segnum); return ptr; } @@ -522,7 +522,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp) struct dn_scp *scp = DN_SK(sk); struct sk_buff *skb = NULL; struct nsp_conn_init_msg *msg; - __u8 len = (__u8)dn_ntohs(scp->conndata_out.opt_optl); + __u8 len = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); if ((skb = dn_alloc_skb(sk, 50 + len, gfp)) == NULL) return; @@ -533,7 +533,7 @@ void dn_send_conn_conf(struct sock *sk, gfp_t gfp) msg->srcaddr = scp->addrloc; msg->services = scp->services_loc; msg->info = scp->info_loc; - msg->segsize = dn_htons(scp->segsize_loc); + msg->segsize = cpu_to_le16(scp->segsize_loc); *skb_put(skb,1) = len; @@ -559,7 +559,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, if ((dst == NULL) || (rem == 0)) { if (net_ratelimit()) - printk(KERN_DEBUG "DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n", dn_ntohs(rem), dst); + printk(KERN_DEBUG "DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n", le16_to_cpu(rem), dst); return; } @@ -572,7 +572,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg, msg += 2; *(__le16 *)msg = loc; msg += 2; - *(__le16 *)msg = dn_htons(reason); + *(__le16 *)msg = cpu_to_le16(reason); msg += 2; if (msgflg == NSP_DISCINIT) *msg++ = ddl; @@ -598,10 +598,10 @@ void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg, int ddl = 0; if (msgflg == NSP_DISCINIT) - ddl = dn_ntohs(scp->discdata_out.opt_optl); + ddl = le16_to_cpu(scp->discdata_out.opt_optl); if (reason == 0) - reason = dn_ntohs(scp->discdata_out.opt_status); + reason = le16_to_cpu(scp->discdata_out.opt_status); dn_nsp_do_disc(sk, msgflg, reason, gfp, sk->sk_dst_cache, ddl, scp->discdata_out.opt_data, scp->addrrem, scp->addrloc); @@ -675,7 +675,7 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg) msg->srcaddr = scp->addrloc; msg->services = scp->services_loc; /* Requested flow control */ msg->info = scp->info_loc; /* Version Number */ - msg->segsize = dn_htons(scp->segsize_loc); /* Max segment size */ + msg->segsize = cpu_to_le16(scp->segsize_loc); /* Max segment size */ if (scp->peer.sdn_objnum) type = 0; @@ -708,7 +708,7 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg) if (aux > 0) memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux); - aux = (__u8)dn_ntohs(scp->conndata_out.opt_optl); + aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); *skb_put(skb, 1) = aux; if (aux > 0) memcpy(skb_put(skb,aux), scp->conndata_out.opt_data, aux); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index eeaa3d819f9c..b33b254c52cb 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -475,7 +475,7 @@ static int dn_route_rx_packet(struct sk_buff *skb) printk(KERN_DEBUG "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n", (int)cb->rt_flags, devname, skb->len, - dn_ntohs(cb->src), dn_ntohs(cb->dst), + le16_to_cpu(cb->src), le16_to_cpu(cb->dst), err, skb->pkt_type); } @@ -575,7 +575,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type { struct dn_skb_cb *cb; unsigned char flags = 0; - __u16 len = dn_ntohs(*(__le16 *)skb->data); + __u16 len = le16_to_cpu(*(__le16 *)skb->data); struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; unsigned char padlen = 0; @@ -773,7 +773,7 @@ static int dn_rt_bug(struct sk_buff *skb) struct dn_skb_cb *cb = DN_SKB_CB(skb); printk(KERN_DEBUG "dn_rt_bug: skb from:%04x to:%04x\n", - dn_ntohs(cb->src), dn_ntohs(cb->dst)); + le16_to_cpu(cb->src), le16_to_cpu(cb->dst)); } kfree_skb(skb); @@ -816,7 +816,7 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) static inline int dn_match_addr(__le16 addr1, __le16 addr2) { - __u16 tmp = dn_ntohs(addr1) ^ dn_ntohs(addr2); + __u16 tmp = le16_to_cpu(addr1) ^ le16_to_cpu(addr2); int match = 16; while(tmp) { tmp >>= 1; @@ -886,8 +886,8 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *old if (decnet_debug_level & 16) printk(KERN_DEBUG "dn_route_output_slow: dst=%04x src=%04x mark=%d" - " iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst), - dn_ntohs(oldflp->fld_src), + " iif=%d oif=%d\n", le16_to_cpu(oldflp->fld_dst), + le16_to_cpu(oldflp->fld_src), oldflp->mark, init_net.loopback_dev->ifindex, oldflp->oif); /* If we have an output interface, verify its a DECnet device */ @@ -959,7 +959,7 @@ source_ok: printk(KERN_DEBUG "dn_route_output_slow: initial checks complete." " dst=%o4x src=%04x oif=%d try_hard=%d\n", - dn_ntohs(fl.fld_dst), dn_ntohs(fl.fld_src), + le16_to_cpu(fl.fld_dst), le16_to_cpu(fl.fld_src), fl.oif, try_hard); /* @@ -1711,8 +1711,8 @@ static int dn_rt_cache_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n", rt->u.dst.dev ? rt->u.dst.dev->name : "*", - dn_addr2asc(dn_ntohs(rt->rt_daddr), buf1), - dn_addr2asc(dn_ntohs(rt->rt_saddr), buf2), + dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1), + dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2), atomic_read(&rt->u.dst.__refcnt), rt->u.dst.__use, (int) dst_metric(&rt->u.dst, RTAX_RTT)); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 3a2830ac89c2..69ad9280c693 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -85,7 +85,7 @@ static int dn_fib_hash_zombies; static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz) { - u16 h = dn_ntohs(key.datum)>>(16 - dz->dz_order); + u16 h = le16_to_cpu(key.datum)>>(16 - dz->dz_order); h ^= (h >> 10); h ^= (h >> 6); h &= DZ_HASHMASK(dz); diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 2f360a1e5e49..965397af9a80 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -126,7 +126,7 @@ static int parse_addr(__le16 *addr, char *str) if (INVALID_END_CHAR(*str)) return -1; - *addr = dn_htons((area << 10) | node); + *addr = cpu_to_le16((area << 10) | node); return 0; } @@ -201,7 +201,7 @@ static int dn_node_address_handler(ctl_table *table, int write, return 0; } - dn_addr2asc(dn_ntohs(decnet_address), addr); + dn_addr2asc(le16_to_cpu(decnet_address), addr); len = strlen(addr); addr[len++] = '\n'; -- cgit v1.2.3 From d211af055d0c12dc3416c2886e6fbdc6eb74a381 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Mon, 24 Nov 2008 15:38:45 +0100 Subject: i386: get rid of the use of KPROBE_ENTRY / KPROBE_END entry_32.S is now the only user of KPROBE_ENTRY / KPROBE_END, treewide. This patch reorders entry_64.S and explicitly generates a separate section for functions that need the protection. The generated code before and after the patch is equal. The KPROBE_ENTRY and KPROBE_END macro's are removed too. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 438 +++++++++++++++++++++++---------------------- include/linux/linkage.h | 8 - 2 files changed, 224 insertions(+), 222 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index bd02ec77edc4..6e96028d1a9c 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -688,65 +688,6 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" -KPROBE_ENTRY(page_fault) - RING0_EC_FRAME - pushl $do_page_fault - CFI_ADJUST_CFA_OFFSET 4 - ALIGN -error_code: - /* the function address is in %fs's slot on the stack */ - pushl %es - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET es, 0*/ - pushl %ds - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET ds, 0*/ - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET eax, 0 - pushl %ebp - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ebp, 0 - pushl %edi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edi, 0 - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET esi, 0 - pushl %edx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edx, 0 - pushl %ecx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ecx, 0 - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ebx, 0 - cld - pushl %fs - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET fs, 0*/ - movl $(__KERNEL_PERCPU), %ecx - movl %ecx, %fs - UNWIND_ESPFIX_STACK - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - /*CFI_REGISTER es, ecx*/ - movl PT_FS(%esp), %edi # get the function address - movl PT_ORIG_EAX(%esp), %edx # get the error code - movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart - mov %ecx, PT_FS(%esp) - /*CFI_REL_OFFSET fs, ES*/ - movl $(__USER_DS), %ecx - movl %ecx, %ds - movl %ecx, %es - TRACE_IRQS_OFF - movl %esp,%eax # pt_regs pointer - call *%edi - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(page_fault) - ENTRY(coprocessor_error) RING0_INT_FRAME pushl $0 @@ -777,140 +718,6 @@ ENTRY(device_not_available) CFI_ENDPROC END(device_not_available) -/* - * Debug traps and NMI can happen at the one SYSENTER instruction - * that sets up the real kernel stack. Check here, since we can't - * allow the wrong stack to be used. - * - * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have - * already pushed 3 words if it hits on the sysenter instruction: - * eflags, cs and eip. - * - * We just load the right stack, and push the three (known) values - * by hand onto the new stack - while updating the return eip past - * the instruction that would have done it for sysenter. - */ -#define FIX_STACK(offset, ok, label) \ - cmpw $__KERNEL_CS,4(%esp); \ - jne ok; \ -label: \ - movl TSS_sysenter_sp0+offset(%esp),%esp; \ - CFI_DEF_CFA esp, 0; \ - CFI_UNDEFINED eip; \ - pushfl; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $__KERNEL_CS; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $sysenter_past_esp; \ - CFI_ADJUST_CFA_OFFSET 4; \ - CFI_REL_OFFSET eip, 0 - -KPROBE_ENTRY(debug) - RING0_INT_FRAME - cmpl $ia32_sysenter_target,(%esp) - jne debug_stack_correct - FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) -debug_stack_correct: - pushl $-1 # mark this as an int - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - TRACE_IRQS_OFF - xorl %edx,%edx # error code 0 - movl %esp,%eax # pt_regs pointer - call do_debug - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(debug) - -/* - * NMI is doubly nasty. It can happen _while_ we're handling - * a debug fault, and the debug fault hasn't yet been able to - * clear up the stack. So we first check whether we got an - * NMI on the sysenter entry path, but after that we need to - * check whether we got an NMI on the debug path where the debug - * fault happened on the sysenter path. - */ -KPROBE_ENTRY(nmi) - RING0_INT_FRAME - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - movl %ss, %eax - cmpw $__ESPFIX_SS, %ax - popl %eax - CFI_ADJUST_CFA_OFFSET -4 - je nmi_espfix_stack - cmpl $ia32_sysenter_target,(%esp) - je nmi_stack_fixup - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - movl %esp,%eax - /* Do not access memory above the end of our stack page, - * it might not exist. - */ - andl $(THREAD_SIZE-1),%eax - cmpl $(THREAD_SIZE-20),%eax - popl %eax - CFI_ADJUST_CFA_OFFSET -4 - jae nmi_stack_correct - cmpl $ia32_sysenter_target,12(%esp) - je nmi_debug_stack_check -nmi_stack_correct: - /* We have a RING0_INT_FRAME here */ - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - TRACE_IRQS_OFF - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_nmi - jmp restore_nocheck_notrace - CFI_ENDPROC - -nmi_stack_fixup: - RING0_INT_FRAME - FIX_STACK(12,nmi_stack_correct, 1) - jmp nmi_stack_correct - -nmi_debug_stack_check: - /* We have a RING0_INT_FRAME here */ - cmpw $__KERNEL_CS,16(%esp) - jne nmi_stack_correct - cmpl $debug,(%esp) - jb nmi_stack_correct - cmpl $debug_esp_fix_insn,(%esp) - ja nmi_stack_correct - FIX_STACK(24,nmi_stack_correct, 1) - jmp nmi_stack_correct - -nmi_espfix_stack: - /* We have a RING0_INT_FRAME here. - * - * create the pointer to lss back - */ - pushl %ss - CFI_ADJUST_CFA_OFFSET 4 - pushl %esp - CFI_ADJUST_CFA_OFFSET 4 - addw $4, (%esp) - /* copy the iret frame of 12 bytes */ - .rept 3 - pushl 16(%esp) - CFI_ADJUST_CFA_OFFSET 4 - .endr - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - TRACE_IRQS_OFF - FIXUP_ESPFIX_STACK # %eax == %esp - xorl %edx,%edx # zero error code - call do_nmi - RESTORE_REGS - lss 12+4(%esp), %esp # back to espfix stack - CFI_ADJUST_CFA_OFFSET -24 - jmp irq_return - CFI_ENDPROC -KPROBE_END(nmi) - #ifdef CONFIG_PARAVIRT ENTRY(native_iret) iret @@ -926,19 +733,6 @@ ENTRY(native_irq_enable_sysexit) END(native_irq_enable_sysexit) #endif -KPROBE_ENTRY(int3) - RING0_INT_FRAME - pushl $-1 # mark this as an int - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - TRACE_IRQS_OFF - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_int3 - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(int3) - ENTRY(overflow) RING0_INT_FRAME pushl $0 @@ -1003,14 +797,6 @@ ENTRY(stack_segment) CFI_ENDPROC END(stack_segment) -KPROBE_ENTRY(general_protection) - RING0_EC_FRAME - pushl $do_general_protection - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -KPROBE_END(general_protection) - ENTRY(alignment_check) RING0_EC_FRAME pushl $do_alignment_check @@ -1220,3 +1006,227 @@ END(mcount) #include "syscall_table_32.S" syscall_table_size=(.-sys_call_table) + +/* + * Some functions should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" + +ENTRY(page_fault) + RING0_EC_FRAME + pushl $do_page_fault + CFI_ADJUST_CFA_OFFSET 4 + ALIGN +error_code: + /* the function address is in %fs's slot on the stack */ + pushl %es + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET es, 0*/ + pushl %ds + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET ds, 0*/ + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eax, 0 + pushl %ebp + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebp, 0 + pushl %edi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edi, 0 + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx, 0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx, 0 + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + cld + pushl %fs + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET fs, 0*/ + movl $(__KERNEL_PERCPU), %ecx + movl %ecx, %fs + UNWIND_ESPFIX_STACK + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_REGISTER es, ecx*/ + movl PT_FS(%esp), %edi # get the function address + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + mov %ecx, PT_FS(%esp) + /*CFI_REL_OFFSET fs, ES*/ + movl $(__USER_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + TRACE_IRQS_OFF + movl %esp,%eax # pt_regs pointer + call *%edi + jmp ret_from_exception + CFI_ENDPROC +END(page_fault) + +/* + * Debug traps and NMI can happen at the one SYSENTER instruction + * that sets up the real kernel stack. Check here, since we can't + * allow the wrong stack to be used. + * + * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have + * already pushed 3 words if it hits on the sysenter instruction: + * eflags, cs and eip. + * + * We just load the right stack, and push the three (known) values + * by hand onto the new stack - while updating the return eip past + * the instruction that would have done it for sysenter. + */ +#define FIX_STACK(offset, ok, label) \ + cmpw $__KERNEL_CS,4(%esp); \ + jne ok; \ +label: \ + movl TSS_sysenter_sp0+offset(%esp),%esp; \ + CFI_DEF_CFA esp, 0; \ + CFI_UNDEFINED eip; \ + pushfl; \ + CFI_ADJUST_CFA_OFFSET 4; \ + pushl $__KERNEL_CS; \ + CFI_ADJUST_CFA_OFFSET 4; \ + pushl $sysenter_past_esp; \ + CFI_ADJUST_CFA_OFFSET 4; \ + CFI_REL_OFFSET eip, 0 + +ENTRY(debug) + RING0_INT_FRAME + cmpl $ia32_sysenter_target,(%esp) + jne debug_stack_correct + FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) +debug_stack_correct: + pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + xorl %edx,%edx # error code 0 + movl %esp,%eax # pt_regs pointer + call do_debug + jmp ret_from_exception + CFI_ENDPROC +END(debug) + +/* + * NMI is doubly nasty. It can happen _while_ we're handling + * a debug fault, and the debug fault hasn't yet been able to + * clear up the stack. So we first check whether we got an + * NMI on the sysenter entry path, but after that we need to + * check whether we got an NMI on the debug path where the debug + * fault happened on the sysenter path. + */ +ENTRY(nmi) + RING0_INT_FRAME + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + movl %ss, %eax + cmpw $__ESPFIX_SS, %ax + popl %eax + CFI_ADJUST_CFA_OFFSET -4 + je nmi_espfix_stack + cmpl $ia32_sysenter_target,(%esp) + je nmi_stack_fixup + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + movl %esp,%eax + /* Do not access memory above the end of our stack page, + * it might not exist. + */ + andl $(THREAD_SIZE-1),%eax + cmpl $(THREAD_SIZE-20),%eax + popl %eax + CFI_ADJUST_CFA_OFFSET -4 + jae nmi_stack_correct + cmpl $ia32_sysenter_target,12(%esp) + je nmi_debug_stack_check +nmi_stack_correct: + /* We have a RING0_INT_FRAME here */ + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_nmi + jmp restore_nocheck_notrace + CFI_ENDPROC + +nmi_stack_fixup: + RING0_INT_FRAME + FIX_STACK(12,nmi_stack_correct, 1) + jmp nmi_stack_correct + +nmi_debug_stack_check: + /* We have a RING0_INT_FRAME here */ + cmpw $__KERNEL_CS,16(%esp) + jne nmi_stack_correct + cmpl $debug,(%esp) + jb nmi_stack_correct + cmpl $debug_esp_fix_insn,(%esp) + ja nmi_stack_correct + FIX_STACK(24,nmi_stack_correct, 1) + jmp nmi_stack_correct + +nmi_espfix_stack: + /* We have a RING0_INT_FRAME here. + * + * create the pointer to lss back + */ + pushl %ss + CFI_ADJUST_CFA_OFFSET 4 + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + addw $4, (%esp) + /* copy the iret frame of 12 bytes */ + .rept 3 + pushl 16(%esp) + CFI_ADJUST_CFA_OFFSET 4 + .endr + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + FIXUP_ESPFIX_STACK # %eax == %esp + xorl %edx,%edx # zero error code + call do_nmi + RESTORE_REGS + lss 12+4(%esp), %esp # back to espfix stack + CFI_ADJUST_CFA_OFFSET -24 + jmp irq_return + CFI_ENDPROC +END(nmi) + +ENTRY(int3) + RING0_INT_FRAME + pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_int3 + jmp ret_from_exception + CFI_ENDPROC +END(int3) + +ENTRY(general_protection) + RING0_EC_FRAME + pushl $do_general_protection + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +END(general_protection) + +/* + * End of kprobes section + */ + .popsection diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 9fd1f859021b..fee9e59649c1 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -64,14 +64,6 @@ name: #endif -#define KPROBE_ENTRY(name) \ - .pushsection .kprobes.text, "ax"; \ - ENTRY(name) - -#define KPROBE_END(name) \ - END(name); \ - .popsection - #ifndef END #define END(name) \ .size name, .-name -- cgit v1.2.3 From 475ad8e2172d7f8b73af5532a8dad265b51339c2 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 27 Nov 2008 23:04:13 -0800 Subject: decnet: compile fix for removal of byteorder wrapper Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- include/net/dn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/dn.h b/include/net/dn.h index 7cd6bb83f7ab..e5469f7b67a3 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include struct dn_scp /* Session Control Port */ { -- cgit v1.2.3 From 8b752e3ef6e3f5cde87afc649dd51d92b1e549c1 Mon Sep 17 00:00:00 2001 From: Liming Wang Date: Fri, 28 Nov 2008 09:52:40 +0800 Subject: softirq: remove useless function __local_bh_enable Impact: remove unused code __local_bh_enable has been replaced with _local_bh_enable. As comments says "it always nests inside local_bh_enable() sections" has not been valid now. Also there is no reason to use __local_bh_enable anywhere, so we can remove this useless function. Signed-off-by: Liming Wang Signed-off-by: Ingo Molnar --- include/linux/bottom_half.h | 1 - kernel/softirq.c | 14 -------------- 2 files changed, 15 deletions(-) (limited to 'include') diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h index 777dbf695d44..27b1bcffe408 100644 --- a/include/linux/bottom_half.h +++ b/include/linux/bottom_half.h @@ -2,7 +2,6 @@ #define _LINUX_BH_H extern void local_bh_disable(void); -extern void __local_bh_enable(void); extern void _local_bh_enable(void); extern void local_bh_enable(void); extern void local_bh_enable_ip(unsigned long ip); diff --git a/kernel/softirq.c b/kernel/softirq.c index e7c69a720d69..8d9934b4162a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -102,20 +102,6 @@ void local_bh_disable(void) EXPORT_SYMBOL(local_bh_disable); -void __local_bh_enable(void) -{ - WARN_ON_ONCE(in_irq()); - - /* - * softirqs should never be enabled by __local_bh_enable(), - * it always nests inside local_bh_enable() sections: - */ - WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET); - - sub_preempt_count(SOFTIRQ_OFFSET); -} -EXPORT_SYMBOL_GPL(__local_bh_enable); - /* * Special-case - softirqs can safely be enabled in * cond_resched_softirq(), or by __do_softirq(), -- cgit v1.2.3 From a838c2ec6ea1f18431da74dfe4978c57355b95f3 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Thu, 27 Nov 2008 16:14:44 +0800 Subject: markers: comment marker_synchronize_unregister() on data dependency Add document and comments on marker_synchronize_unregister(): it should be called before freeing resources that the probes depend on. Based on comments from Lai Jiangshan and Mathieu Desnoyers. Signed-off-by: Wu Fengguang Reviewed-by: Mathieu Desnoyers Reviewed-by: Lai Jiangshan Signed-off-by: Ingo Molnar --- Documentation/markers.txt | 15 ++++++++++----- include/linux/marker.h | 6 ++++-- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/Documentation/markers.txt b/Documentation/markers.txt index 6d275e4ef385..d2b3d0e91b26 100644 --- a/Documentation/markers.txt +++ b/Documentation/markers.txt @@ -51,11 +51,16 @@ to call) for the specific marker through marker_probe_register() and can be activated by calling marker_arm(). Marker deactivation can be done by calling marker_disarm() as many times as marker_arm() has been called. Removing a probe is done through marker_probe_unregister(); it will disarm the probe. -marker_synchronize_unregister() must be called before the end of the module exit -function to make sure there is no caller left using the probe. This, and the -fact that preemption is disabled around the probe call, make sure that probe -removal and module unload are safe. See the "Probe example" section below for a -sample probe module. + +marker_synchronize_unregister() must be called between probe unregistration and +the first occurrence of +- the end of module exit function, + to make sure there is no caller left using the probe; +- the free of any resource used by the probes, + to make sure the probes wont be accessing invalid data. +This, and the fact that preemption is disabled around the probe call, make sure +that probe removal and module unload are safe. See the "Probe example" section +below for a sample probe module. The marker mechanism supports inserting multiple instances of the same marker. Markers can be put in inline functions, inlined static functions, and diff --git a/include/linux/marker.h b/include/linux/marker.h index 34c14bc957f5..b85e74ca782f 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -211,8 +211,10 @@ extern void *marker_get_private_data(const char *name, marker_probe_func *probe, /* * marker_synchronize_unregister must be called between the last marker probe - * unregistration and the end of module exit to make sure there is no caller - * executing a probe when it is freed. + * unregistration and the first one of + * - the end of module exit function + * - the free of any resource used by the probes + * to ensure the code and data are valid for any possibly running probes. */ #define marker_synchronize_unregister() synchronize_sched() -- cgit v1.2.3 From ec5679e513305f1411753e5f5489935bd638af23 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 28 Nov 2008 17:56:14 +0100 Subject: debug warnings: eliminate warn_on_slowpath() Impact: cleanup, eliminate code now that warn_on_slowpath() uses warn_slowpath(...,NULL), we can eliminate warn_on_slowpath() altogether and use warn_slowpath(). Signed-off-by: Ingo Molnar --- include/asm-generic/bug.h | 7 +++---- kernel/panic.c | 6 ------ 2 files changed, 3 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 12c07c1866b2..b8ba6941f587 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -33,15 +33,14 @@ struct bug_entry { #ifndef __WARN #ifndef __ASSEMBLY__ -extern void warn_on_slowpath(const char *file, const int line); extern void warn_slowpath(const char *file, const int line, const char *fmt, ...) __attribute__((format(printf, 3, 4))); #define WANT_WARN_ON_SLOWPATH #endif -#define __WARN() warn_on_slowpath(__FILE__, __LINE__) -#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg) +#define __WARN() warn_slowpath(__FILE__, __LINE__, NULL) +#define __WARN_printf(arg...) warn_slowpath(__FILE__, __LINE__, arg) #else -#define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0) +#define __WARN_printf(arg...) do { printk(arg); __WARN(); } while (0) #endif #ifndef WARN_ON diff --git a/kernel/panic.c b/kernel/panic.c index 73d365199c3f..50349a41fba7 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -349,12 +349,6 @@ void warn_slowpath(const char *file, int line, const char *fmt, ...) add_taint(TAINT_WARN); } EXPORT_SYMBOL(warn_slowpath); - -void warn_on_slowpath(const char *file, int line) -{ - warn_slowpath(file, line, NULL); -} -EXPORT_SYMBOL(warn_on_slowpath); #endif #ifdef CONFIG_CC_STACKPROTECTOR -- cgit v1.2.3 From 0f0ca340e57bd7446855fefd07a64249acf81223 Mon Sep 17 00:00:00 2001 From: Giuseppe Cavallaro Date: Fri, 28 Nov 2008 16:24:56 -0800 Subject: phy: power management support This patch adds the power management support into the physical abstraction layer. Suspend and resume functions respectively turns on/off the bit 11 into the PHY Basic mode control register. Generic PHY device starts supporting PM. In order to support the wake-on LAN and avoid to put in power down the PHY device, the MDIO is aware of what the Ethernet device wants to do. Voluntary, no CONFIG_PM defines were added into the sources. Also generic suspend/resume functions are exported to allow other drivers use them (such as genphy_config_aneg etc.). Within the phy_driver_register function, we need to remove the memset. It overrides the device driver owner and it is not good. Signed-off-by: Giuseppe Cavallaro Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 14 ++++++++++---- drivers/net/phy/phy_device.c | 31 ++++++++++++++++++++++++++++++- include/linux/phy.h | 2 ++ 3 files changed, 42 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 868812ff6de8..8755d8cd4166 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -284,9 +284,12 @@ static int mdio_bus_suspend(struct device * dev, pm_message_t state) { int ret = 0; struct device_driver *drv = dev->driver; + struct phy_driver *phydrv = to_phy_driver(drv); + struct phy_device *phydev = to_phy_device(dev); - if (drv && drv->suspend) - ret = drv->suspend(dev, state); + if ((!device_may_wakeup(phydev->dev.parent)) && + (phydrv && phydrv->suspend)) + ret = phydrv->suspend(phydev); return ret; } @@ -295,9 +298,12 @@ static int mdio_bus_resume(struct device * dev) { int ret = 0; struct device_driver *drv = dev->driver; + struct phy_driver *phydrv = to_phy_driver(drv); + struct phy_device *phydev = to_phy_device(dev); - if (drv && drv->resume) - ret = drv->resume(dev); + if ((!device_may_wakeup(phydev->dev.parent)) && + (phydrv && phydrv->resume)) + ret = phydrv->resume(phydev); return ret; } diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 29546a206045..4cc75a290c06 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -779,7 +779,35 @@ static int genphy_config_init(struct phy_device *phydev) return 0; } +int genphy_suspend(struct phy_device *phydev) +{ + int value; + + mutex_lock(&phydev->lock); + + value = phy_read(phydev, MII_BMCR); + phy_write(phydev, MII_BMCR, (value | BMCR_PDOWN)); + + mutex_unlock(&phydev->lock); + + return 0; +} +EXPORT_SYMBOL(genphy_suspend); +int genphy_resume(struct phy_device *phydev) +{ + int value; + + mutex_lock(&phydev->lock); + + value = phy_read(phydev, MII_BMCR); + phy_write(phydev, MII_BMCR, (value & ~BMCR_PDOWN)); + + mutex_unlock(&phydev->lock); + + return 0; +} +EXPORT_SYMBOL(genphy_resume); /** * phy_probe - probe and init a PHY device @@ -855,7 +883,6 @@ int phy_driver_register(struct phy_driver *new_driver) { int retval; - memset(&new_driver->driver, 0, sizeof(new_driver->driver)); new_driver->driver.name = new_driver->name; new_driver->driver.bus = &mdio_bus_type; new_driver->driver.probe = phy_probe; @@ -890,6 +917,8 @@ static struct phy_driver genphy_driver = { .features = 0, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, + .suspend = genphy_suspend, + .resume = genphy_resume, .driver = {.owner= THIS_MODULE, }, }; diff --git a/include/linux/phy.h b/include/linux/phy.h index 77c4ed60b982..d7e54d98869f 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -467,6 +467,8 @@ int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); int genphy_read_status(struct phy_device *phydev); +int genphy_suspend(struct phy_device *phydev); +int genphy_resume(struct phy_device *phydev); void phy_driver_unregister(struct phy_driver *drv); int phy_driver_register(struct phy_driver *new_driver); void phy_prepare_link(struct phy_device *phydev, -- cgit v1.2.3 From 7a9d4020533b5c0c615b6de3be154c9ff30b8cc9 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 30 Nov 2008 12:17:26 +0100 Subject: Bluetooth: Send HCI Reset command by default on device initialization The Bluetooth subsystem was not using the HCI Reset command when doing device initialization. The Bluetooth 1.0b specification was ambiguous on how the device firmware was suppose to handle it. Almost every device was triggering a transport reset at the same time. In case of USB this ended up in disconnects from the bus. All modern Bluetooth dongles handle this perfectly fine and a lot of them actually require that HCI Reset is sent. If not then they are either stuck in their HID Proxy mode or their internal structures for inquiry and paging are not correctly setup. To handle old and new devices smoothly the Bluetooth subsystem contains a quirk to force the HCI Reset on initialization. However maintaining such a quirk becomes more and more complicated. This patch turns the logic around and lets the old devices disable the HCI Reset command. The only device where the HCI_QUIRK_NO_RESET is still needed are the original Digianswer devices and dongles with an early CSR firmware. CSR reported that they fixed this for version 12 firmware. The last official release of version 11 firmware is build ID 115. The first version 12 candidate was build ID 117. Signed-off-by: Marcel Holtmann --- drivers/bluetooth/bpa10x.c | 2 + drivers/bluetooth/btusb.c | 99 ++++++++++++++++++++----------------------- drivers/bluetooth/hci_ldisc.c | 4 +- include/net/bluetooth/hci.h | 2 +- net/bluetooth/af_bluetooth.c | 2 +- net/bluetooth/hci_core.c | 2 +- 6 files changed, 54 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index b936d8ce2728..4f0a57236d23 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -489,6 +489,8 @@ static int bpa10x_probe(struct usb_interface *intf, const struct usb_device_id * hdev->owner = THIS_MODULE; + set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); + err = hci_register_dev(hdev); if (err < 0) { hci_free_dev(hdev); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 0cd4a55dd5c4..5d97bedb83e0 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -41,25 +41,25 @@ #define BT_DBG(D...) #endif -#define VERSION "0.3" +#define VERSION "0.4" static int ignore_dga; static int ignore_csr; static int ignore_sniffer; static int disable_scofix; static int force_scofix; -static int reset; + +static int reset = 1; static struct usb_driver btusb_driver; #define BTUSB_IGNORE 0x01 -#define BTUSB_RESET 0x02 -#define BTUSB_DIGIANSWER 0x04 -#define BTUSB_CSR 0x08 -#define BTUSB_SNIFFER 0x10 -#define BTUSB_BCM92035 0x20 -#define BTUSB_BROKEN_ISOC 0x40 -#define BTUSB_WRONG_SCO_MTU 0x80 +#define BTUSB_DIGIANSWER 0x02 +#define BTUSB_CSR 0x04 +#define BTUSB_SNIFFER 0x08 +#define BTUSB_BCM92035 0x10 +#define BTUSB_BROKEN_ISOC 0x20 +#define BTUSB_WRONG_SCO_MTU 0x40 static struct usb_device_id btusb_table[] = { /* Generic Bluetooth USB device */ @@ -79,7 +79,7 @@ static struct usb_device_id btusb_table[] = { { USB_DEVICE(0x0bdb, 0x1002) }, /* Canyon CN-BTU1 with HID interfaces */ - { USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_RESET }, + { USB_DEVICE(0x0c10, 0x0000) }, { } /* Terminating entry */ }; @@ -94,52 +94,36 @@ static struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0a5c, 0x2033), .driver_info = BTUSB_IGNORE }, /* Broadcom BCM2035 */ - { USB_DEVICE(0x0a5c, 0x2035), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, - { USB_DEVICE(0x0a5c, 0x200a), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, + { USB_DEVICE(0x0a5c, 0x2035), .driver_info = BTUSB_WRONG_SCO_MTU }, + { USB_DEVICE(0x0a5c, 0x200a), .driver_info = BTUSB_WRONG_SCO_MTU }, + { USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 }, /* Broadcom BCM2045 */ - { USB_DEVICE(0x0a5c, 0x2039), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, - { USB_DEVICE(0x0a5c, 0x2101), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, - - /* Broadcom BCM2046 */ - { USB_DEVICE(0x0a5c, 0x2146), .driver_info = BTUSB_RESET }, - { USB_DEVICE(0x0a5c, 0x2151), .driver_info = BTUSB_RESET }, - - /* Apple MacBook Pro with Broadcom chip */ - { USB_DEVICE(0x05ac, 0x820f), .driver_info = BTUSB_RESET }, + { USB_DEVICE(0x0a5c, 0x2039), .driver_info = BTUSB_WRONG_SCO_MTU }, + { USB_DEVICE(0x0a5c, 0x2101), .driver_info = BTUSB_WRONG_SCO_MTU }, /* IBM/Lenovo ThinkPad with Broadcom chip */ - { USB_DEVICE(0x0a5c, 0x201e), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, - { USB_DEVICE(0x0a5c, 0x2110), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, - - /* Targus ACB10US */ - { USB_DEVICE(0x0a5c, 0x2100), .driver_info = BTUSB_RESET }, - { USB_DEVICE(0x0a5c, 0x2154), .driver_info = BTUSB_RESET }, - - /* ANYCOM Bluetooth USB-200 and USB-250 */ - { USB_DEVICE(0x0a5c, 0x2111), .driver_info = BTUSB_RESET }, + { USB_DEVICE(0x0a5c, 0x201e), .driver_info = BTUSB_WRONG_SCO_MTU }, + { USB_DEVICE(0x0a5c, 0x2110), .driver_info = BTUSB_WRONG_SCO_MTU }, /* HP laptop with Broadcom chip */ - { USB_DEVICE(0x03f0, 0x171d), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, + { USB_DEVICE(0x03f0, 0x171d), .driver_info = BTUSB_WRONG_SCO_MTU }, /* Dell laptop with Broadcom chip */ - { USB_DEVICE(0x413c, 0x8126), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, + { USB_DEVICE(0x413c, 0x8126), .driver_info = BTUSB_WRONG_SCO_MTU }, /* Dell Wireless 370 */ - { USB_DEVICE(0x413c, 0x8156), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, + { USB_DEVICE(0x413c, 0x8156), .driver_info = BTUSB_WRONG_SCO_MTU }, /* Dell Wireless 410 */ - { USB_DEVICE(0x413c, 0x8152), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, - - /* Microsoft Wireless Transceiver for Bluetooth 2.0 */ - { USB_DEVICE(0x045e, 0x009c), .driver_info = BTUSB_RESET }, + { USB_DEVICE(0x413c, 0x8152), .driver_info = BTUSB_WRONG_SCO_MTU }, /* Kensington Bluetooth USB adapter */ - { USB_DEVICE(0x047d, 0x105d), .driver_info = BTUSB_RESET }, - { USB_DEVICE(0x047d, 0x105e), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, + { USB_DEVICE(0x047d, 0x105e), .driver_info = BTUSB_WRONG_SCO_MTU }, - /* ISSC Bluetooth Adapter v3.1 */ - { USB_DEVICE(0x1131, 0x1001), .driver_info = BTUSB_RESET }, + /* Belkin F8T012 and F8T013 devices */ + { USB_DEVICE(0x050d, 0x0012), .driver_info = BTUSB_WRONG_SCO_MTU }, + { USB_DEVICE(0x050d, 0x0013), .driver_info = BTUSB_WRONG_SCO_MTU }, /* RTX Telecom based adapters with buggy SCO support */ { USB_DEVICE(0x0400, 0x0807), .driver_info = BTUSB_BROKEN_ISOC }, @@ -148,13 +132,6 @@ static struct usb_device_id blacklist_table[] = { /* CONWISE Technology based adapters with buggy SCO support */ { USB_DEVICE(0x0e5e, 0x6622), .driver_info = BTUSB_BROKEN_ISOC }, - /* Belkin F8T012 and F8T013 devices */ - { USB_DEVICE(0x050d, 0x0012), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, - { USB_DEVICE(0x050d, 0x0013), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, - - /* Belkin F8T016 device */ - { USB_DEVICE(0x050d, 0x016a), .driver_info = BTUSB_RESET }, - /* Digianswer devices */ { USB_DEVICE(0x08fd, 0x0001), .driver_info = BTUSB_DIGIANSWER }, { USB_DEVICE(0x08fd, 0x0002), .driver_info = BTUSB_IGNORE }, @@ -197,6 +174,8 @@ struct btusb_data { struct usb_endpoint_descriptor *isoc_tx_ep; struct usb_endpoint_descriptor *isoc_rx_ep; + __u8 cmdreq_type; + int isoc_altsetting; int suspend_count; }; @@ -590,7 +569,7 @@ static int btusb_send_frame(struct sk_buff *skb) return -ENOMEM; } - dr->bRequestType = USB_TYPE_CLASS; + dr->bRequestType = data->cmdreq_type; dr->bRequest = 0; dr->wIndex = 0; dr->wValue = 0; @@ -828,6 +807,8 @@ static int btusb_probe(struct usb_interface *intf, return -ENODEV; } + data->cmdreq_type = USB_TYPE_CLASS; + data->udev = interface_to_usbdev(intf); data->intf = intf; @@ -862,11 +843,11 @@ static int btusb_probe(struct usb_interface *intf, hdev->owner = THIS_MODULE; - /* interface numbers are hardcoded in the spec */ + /* Interface numbers are hardcoded in the specification */ data->isoc = usb_ifnum_to_if(data->udev, 1); - if (reset || id->driver_info & BTUSB_RESET) - set_bit(HCI_QUIRK_RESET_ON_INIT, &hdev->quirks); + if (!reset) + set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); if (force_scofix || id->driver_info & BTUSB_WRONG_SCO_MTU) { if (!disable_scofix) @@ -876,9 +857,23 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_BROKEN_ISOC) data->isoc = NULL; + if (id->driver_info & BTUSB_DIGIANSWER) { + data->cmdreq_type = USB_TYPE_VENDOR; + set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); + } + + if (id->driver_info & BTUSB_CSR) { + struct usb_device *udev = data->udev; + + /* Old firmware would otherwise execute USB reset */ + if (le16_to_cpu(udev->descriptor.bcdDevice) < 0x117) + set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); + } + if (id->driver_info & BTUSB_SNIFFER) { struct usb_device *udev = data->udev; + /* New sniffer firmware has crippled HCI interface */ if (le16_to_cpu(udev->descriptor.bcdDevice) > 0x997) set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index 4426bb552bd9..cb46bf52a19a 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -399,8 +399,8 @@ static int hci_uart_register_dev(struct hci_uart *hu) hdev->owner = THIS_MODULE; - if (reset) - set_bit(HCI_QUIRK_RESET_ON_INIT, &hdev->quirks); + if (!reset) + set_bit(HCI_QUIRK_NO_RESET, &hdev->quirks); if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 3cc294919312..3645139e68c7 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -54,7 +54,7 @@ /* HCI device quirks */ enum { - HCI_QUIRK_RESET_ON_INIT, + HCI_QUIRK_NO_RESET, HCI_QUIRK_RAW_DEVICE, HCI_QUIRK_FIXUP_BUFFER_SIZE }; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 41f07f549ba7..f6f216e57aa3 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -46,7 +46,7 @@ #define BT_DBG(D...) #endif -#define VERSION "2.13" +#define VERSION "2.14" /* Bluetooth sockets */ #define BT_MAX_PROTO 8 diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 7bb0f1cb7f26..fa7c5370b556 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -205,7 +205,7 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Mandatory initialization */ /* Reset */ - if (test_bit(HCI_QUIRK_RESET_ON_INIT, &hdev->quirks)) + if (!test_bit(HCI_QUIRK_NO_RESET, &hdev->quirks)) hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); /* Read Local Supported Features */ -- cgit v1.2.3 From a418b893a6af11ae73c762ed5b76c1bad6dc19d8 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 30 Nov 2008 12:17:28 +0100 Subject: Bluetooth: Enable per-module dynamic debug messages With the introduction of CONFIG_DYNAMIC_PRINTK_DEBUG it is possible to allow debugging without having to recompile the kernel. This patch turns all BT_DBG() calls into pr_debug() to support dynamic debug messages. As a side effect all CONFIG_BT_*_DEBUG statements are now removed and some broken debug entries have been fixed. Signed-off-by: Marcel Holtmann --- drivers/bluetooth/bcm203x.c | 9 ++------- drivers/bluetooth/bfusb.c | 11 +++-------- drivers/bluetooth/bpa10x.c | 5 ----- drivers/bluetooth/btsdio.c | 5 ----- drivers/bluetooth/btusb.c | 6 ------ drivers/bluetooth/hci_bcsp.c | 5 ----- drivers/bluetooth/hci_h4.c | 5 ----- drivers/bluetooth/hci_ldisc.c | 5 ----- drivers/bluetooth/hci_ll.c | 5 ----- drivers/bluetooth/hci_vhci.c | 5 ----- include/net/bluetooth/bluetooth.h | 4 ++-- net/bluetooth/af_bluetooth.c | 7 +------ net/bluetooth/bnep/bnep.h | 2 +- net/bluetooth/bnep/core.c | 5 ----- net/bluetooth/bnep/netdev.c | 5 ----- net/bluetooth/bnep/sock.c | 5 ----- net/bluetooth/cmtp/capi.c | 5 ----- net/bluetooth/cmtp/core.c | 5 ----- net/bluetooth/cmtp/sock.c | 5 ----- net/bluetooth/hci_conn.c | 5 ----- net/bluetooth/hci_core.c | 7 +------ net/bluetooth/hci_event.c | 5 ----- net/bluetooth/hci_sock.c | 5 ----- net/bluetooth/hci_sysfs.c | 5 ----- net/bluetooth/hidp/core.c | 5 ----- net/bluetooth/hidp/sock.c | 5 ----- net/bluetooth/l2cap.c | 5 ----- net/bluetooth/rfcomm/core.c | 5 ----- net/bluetooth/rfcomm/sock.c | 7 +------ net/bluetooth/rfcomm/tty.c | 5 ----- net/bluetooth/sco.c | 5 ----- 31 files changed, 11 insertions(+), 157 deletions(-) (limited to 'include') diff --git a/drivers/bluetooth/bcm203x.c b/drivers/bluetooth/bcm203x.c index ee40201c7278..eafd4af0746e 100644 --- a/drivers/bluetooth/bcm203x.c +++ b/drivers/bluetooth/bcm203x.c @@ -37,11 +37,6 @@ #include -#ifndef CONFIG_BT_HCIBCM203X_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "1.2" static struct usb_device_id bcm203x_table[] = { @@ -199,7 +194,7 @@ static int bcm203x_probe(struct usb_interface *intf, const struct usb_device_id return -EIO; } - BT_DBG("minidrv data %p size %d", firmware->data, firmware->size); + BT_DBG("minidrv data %p size %zu", firmware->data, firmware->size); size = max_t(uint, firmware->size, 4096); @@ -227,7 +222,7 @@ static int bcm203x_probe(struct usb_interface *intf, const struct usb_device_id return -EIO; } - BT_DBG("firmware data %p size %d", firmware->data, firmware->size); + BT_DBG("firmware data %p size %zu", firmware->data, firmware->size); data->fw_data = kmalloc(firmware->size, GFP_KERNEL); if (!data->fw_data) { diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c index 90a094634630..d3f14bee0f19 100644 --- a/drivers/bluetooth/bfusb.c +++ b/drivers/bluetooth/bfusb.c @@ -38,11 +38,6 @@ #include #include -#ifndef CONFIG_BT_HCIBFUSB_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "1.2" static struct usb_driver bfusb_driver; @@ -221,7 +216,7 @@ static int bfusb_rx_submit(struct bfusb_data *data, struct urb *urb) struct sk_buff *skb; int err, pipe, size = HCI_MAX_FRAME_SIZE + 32; - BT_DBG("bfusb %p urb %p", bfusb, urb); + BT_DBG("bfusb %p urb %p", data, urb); if (!urb && !(urb = usb_alloc_urb(0, GFP_ATOMIC))) return -ENOMEM; @@ -354,7 +349,7 @@ static void bfusb_rx_complete(struct urb *urb) int count = urb->actual_length; int err, hdr, len; - BT_DBG("bfusb %p urb %p skb %p len %d", bfusb, urb, skb, skb->len); + BT_DBG("bfusb %p urb %p skb %p len %d", data, urb, skb, skb->len); read_lock(&data->lock); @@ -691,7 +686,7 @@ static int bfusb_probe(struct usb_interface *intf, const struct usb_device_id *i goto error; } - BT_DBG("firmware data %p size %d", firmware->data, firmware->size); + BT_DBG("firmware data %p size %zu", firmware->data, firmware->size); if (bfusb_load_firmware(data, firmware->data, firmware->size) < 0) { BT_ERR("Firmware loading failed"); diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index 4f0a57236d23..c115285867c3 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -35,11 +35,6 @@ #include #include -#ifndef CONFIG_BT_HCIBPA10X_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "0.10" static struct usb_device_id bpa10x_table[] = { diff --git a/drivers/bluetooth/btsdio.c b/drivers/bluetooth/btsdio.c index f2ada0c65486..7e298275c8f6 100644 --- a/drivers/bluetooth/btsdio.c +++ b/drivers/bluetooth/btsdio.c @@ -37,11 +37,6 @@ #include #include -#ifndef CONFIG_BT_HCIBTSDIO_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "0.1" static const struct sdio_device_id btsdio_table[] = { diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 7f7526c186d0..b5fbda6d490a 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -35,12 +35,6 @@ #include #include -//#define CONFIG_BT_HCIBTUSB_DEBUG -#ifndef CONFIG_BT_HCIBTUSB_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "0.4" static int ignore_dga; diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index 7938062c1cc7..894b2cb11ea6 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -47,11 +47,6 @@ #include "hci_uart.h" -#ifndef CONFIG_BT_HCIUART_DEBUG -#undef BT_DBG -#define BT_DBG( A... ) -#endif - #define VERSION "0.3" static int txcrc = 1; diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index bfbae14cf93d..b0fafb055996 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -46,11 +46,6 @@ #include "hci_uart.h" -#ifndef CONFIG_BT_HCIUART_DEBUG -#undef BT_DBG -#define BT_DBG( A... ) -#endif - #define VERSION "1.2" struct h4_struct { diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index cb46bf52a19a..af761dc434f6 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -46,11 +46,6 @@ #include "hci_uart.h" -#ifndef CONFIG_BT_HCIUART_DEBUG -#undef BT_DBG -#define BT_DBG( A... ) -#endif - #define VERSION "2.2" static int reset = 0; diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index 2d2f66e17fea..b91d45a41b2f 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c @@ -51,11 +51,6 @@ #include "hci_uart.h" -#ifndef CONFIG_BT_HCIUART_DEBUG -#undef BT_DBG -#define BT_DBG( A... ) -#endif - /* HCILL commands */ #define HCILL_GO_TO_SLEEP_IND 0x30 #define HCILL_GO_TO_SLEEP_ACK 0x31 diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 7320a71b6368..0bbefba6469c 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -40,11 +40,6 @@ #include #include -#ifndef CONFIG_BT_HCIVHCI_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "1.2" static int minor = MISC_DYNAMIC_MINOR; diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 996d12df7594..a04f8463ac7e 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -54,8 +54,8 @@ #define SOL_RFCOMM 18 #define BT_INFO(fmt, arg...) printk(KERN_INFO "Bluetooth: " fmt "\n" , ## arg) -#define BT_DBG(fmt, arg...) printk(KERN_INFO "%s: " fmt "\n" , __func__ , ## arg) -#define BT_ERR(fmt, arg...) printk(KERN_ERR "%s: " fmt "\n" , __func__ , ## arg) +#define BT_ERR(fmt, arg...) printk(KERN_ERR "%s: " fmt "\n" , __func__ , ## arg) +#define BT_DBG(fmt, arg...) pr_debug("%s: " fmt "\n" , __func__ , ## arg) /* Connection and socket states */ enum { diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f6f216e57aa3..744ed3f07ef3 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -41,11 +41,6 @@ #include -#ifndef CONFIG_BT_SOCK_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "2.14" /* Bluetooth sockets */ @@ -245,7 +240,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, size_t copied; int err; - BT_DBG("sock %p sk %p len %d", sock, sk, len); + BT_DBG("sock %p sk %p len %zu", sock, sk, len); if (flags & (MSG_OOB)) return -EOPNOTSUPP; diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index b69bf4e7c48b..d20f8a40f36e 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -161,7 +161,7 @@ struct bnep_session { struct msghdr msg; struct bnep_proto_filter proto_filter[BNEP_MAX_PROTO_FILTERS]; - u64 mc_filter; + unsigned long long mc_filter; struct socket *sock; struct net_device *dev; diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index f8efaf35293c..70fea8bdb4e5 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -52,11 +52,6 @@ #include "bnep.h" -#ifndef CONFIG_BT_BNEP_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "1.3" static int compress_src = 1; diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 47e179f62e82..f897da6e0444 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -41,11 +41,6 @@ #include "bnep.h" -#ifndef CONFIG_BT_BNEP_DEBUG -#undef BT_DBG -#define BT_DBG( A... ) -#endif - #define BNEP_TX_QUEUE_LEN 20 static int bnep_net_open(struct net_device *dev) diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index 8ffb57f2303a..e857628b0b27 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -46,11 +46,6 @@ #include "bnep.h" -#ifndef CONFIG_BT_BNEP_DEBUG -#undef BT_DBG -#define BT_DBG( A... ) -#endif - static int bnep_sock_release(struct socket *sock) { struct sock *sk = sock->sk; diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 3e9d5bb3fefb..78958c0f9a40 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -42,11 +42,6 @@ #include "cmtp.h" -#ifndef CONFIG_BT_CMTP_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define CAPI_INTEROPERABILITY 0x20 #define CAPI_INTEROPERABILITY_REQ CAPICMD(CAPI_INTEROPERABILITY, CAPI_REQ) diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index ca60a4517fd3..c9cac7719efe 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -44,11 +44,6 @@ #include "cmtp.h" -#ifndef CONFIG_BT_CMTP_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "1.0" static DECLARE_RWSEM(cmtp_session_sem); diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 8c7f7bc4e0ba..16b0fad74f6e 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -43,11 +43,6 @@ #include "cmtp.h" -#ifndef CONFIG_BT_CMTP_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - static int cmtp_sock_release(struct socket *sock) { struct sock *sk = sock->sk; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index b7002429f152..a4a789f24c8d 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -45,11 +45,6 @@ #include #include -#ifndef CONFIG_BT_HCI_CORE_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - void hci_acl_connect(struct hci_conn *conn) { struct hci_dev *hdev = conn->hdev; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fa7c5370b556..ba78cc1eb8d9 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -48,11 +48,6 @@ #include #include -#ifndef CONFIG_BT_HCI_CORE_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - static void hci_cmd_task(unsigned long arg); static void hci_rx_task(unsigned long arg); static void hci_tx_task(unsigned long arg); @@ -290,7 +285,7 @@ static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt) { __le16 policy = cpu_to_le16(opt); - BT_DBG("%s %x", hdev->name, opt); + BT_DBG("%s %x", hdev->name, policy); /* Default link policy */ hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index ad7a553d7713..f91ba690f5d2 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -45,11 +45,6 @@ #include #include -#ifndef CONFIG_BT_HCI_CORE_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - /* Handle HCI Event packets */ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index d62579b67959..4f9621f759a0 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -49,11 +49,6 @@ #include #include -#ifndef CONFIG_BT_HCI_SOCK_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - /* ----- HCI socket interface ----- */ static inline int hci_test_bit(int nr, void *addr) diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index f2bbb2f65434..6490bf8402f3 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -6,11 +6,6 @@ #include #include -#ifndef CONFIG_BT_HCI_CORE_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - struct class *bt_class = NULL; EXPORT_SYMBOL_GPL(bt_class); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index acdeab3d9807..b18676870d55 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -47,11 +47,6 @@ #include "hidp.h" -#ifndef CONFIG_BT_HIDP_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "1.2" static DECLARE_RWSEM(hidp_session_sem); diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index f4dd02ca9a96..37c9d7d2e688 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -39,11 +39,6 @@ #include "hidp.h" -#ifndef CONFIG_BT_HIDP_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - static int hidp_sock_release(struct socket *sock) { struct sock *sk = sock->sk; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 9610a9c85b98..b93748e224ff 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -50,11 +50,6 @@ #include #include -#ifndef CONFIG_BT_L2CAP_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "2.11" static u32 l2cap_feat_mask = 0x0000; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index ba537fae0a4c..37c640d1c3fd 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -46,11 +46,6 @@ #include #include -#ifndef CONFIG_BT_RFCOMM_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "1.10" static int disable_cfc = 0; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index bc0d4a7ce6ae..ad00cbf449cb 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -50,11 +50,6 @@ #include #include -#ifndef CONFIG_BT_RFCOMM_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - static const struct proto_ops rfcomm_sock_ops; static struct bt_sock_list rfcomm_sk_list = { @@ -644,7 +639,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = 0; - BT_DBG("sk %p size %d", sk, size); + BT_DBG("sk %p size %zu", sk, size); lock_sock(sk); diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index d3340dd52bcf..1e4100bb0b65 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -39,11 +39,6 @@ #include #include -#ifndef CONFIG_BT_RFCOMM_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define RFCOMM_TTY_MAGIC 0x6d02 /* magic number for rfcomm struct */ #define RFCOMM_TTY_PORTS RFCOMM_MAX_DEV /* whole lotta rfcomm devices */ #define RFCOMM_TTY_MAJOR 216 /* device node major id of the usb/bluetooth.c driver */ diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 0cc91e6da76d..46fd8bf9a690 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -48,11 +48,6 @@ #include #include -#ifndef CONFIG_BT_SCO_DEBUG -#undef BT_DBG -#define BT_DBG(D...) -#endif - #define VERSION "0.6" static int disable_esco = 0; -- cgit v1.2.3 From 6c415b9234a8c71f290e5d4fddc467f103f32719 Mon Sep 17 00:00:00 2001 From: Arun R Bharadwaj Date: Mon, 1 Dec 2008 20:49:05 +0530 Subject: sched: add uid information to sched_debug for CONFIG_USER_SCHED Impact: extend information in /proc/sched_debug This patch adds uid information in sched_debug for CONFIG_USER_SCHED Signed-off-by: Arun R Bharadwaj Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + kernel/sched.c | 10 ++++++++++ kernel/sched_debug.c | 6 +++++- kernel/user.c | 2 ++ 4 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7a69c4d224ee..d8733f07d80b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2218,6 +2218,7 @@ extern void normalize_rt_tasks(void); extern struct task_group init_task_group; #ifdef CONFIG_USER_SCHED extern struct task_group root_task_group; +extern void set_tg_uid(struct user_struct *user); #endif extern struct task_group *sched_create_group(struct task_group *parent); diff --git a/kernel/sched.c b/kernel/sched.c index 6a99703e0eb0..4c7388ef5be7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -261,6 +261,10 @@ struct task_group { struct cgroup_subsys_state css; #endif +#ifdef CONFIG_USER_SCHED + uid_t uid; +#endif + #ifdef CONFIG_FAIR_GROUP_SCHED /* schedulable entities of this group on each cpu */ struct sched_entity **se; @@ -286,6 +290,12 @@ struct task_group { #ifdef CONFIG_USER_SCHED +/* Helper function to pass uid information to create_sched_user() */ +void set_tg_uid(struct user_struct *user) +{ + user->tg->uid = user->uid; +} + /* * Root task group. * Every UID task group (including init_task_group aka UID-0) will diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index baf2f17af462..4293cfa9681d 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -160,10 +160,14 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) cgroup_path(tg->css.cgroup, path, sizeof(path)); SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); +#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) + { + uid_t uid = cfs_rq->tg->uid; + SEQ_printf(m, "\ncfs_rq[%d] for UID: %u\n", cpu, uid); + } #else SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); #endif - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); diff --git a/kernel/user.c b/kernel/user.c index 39d6159fae43..cec2224bc9f5 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -101,6 +101,8 @@ static int sched_create_user(struct user_struct *up) if (IS_ERR(up->tg)) rc = -ENOMEM; + set_tg_uid(up); + return rc; } -- cgit v1.2.3 From 968a6025aa9f909d487988efb542217a126023a0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 28 Nov 2008 11:49:07 +0000 Subject: ASoC: Rename snd_soc_register_card() to snd_soc_init_card() Currently ASoC card initialisation is completed by a function called snd_soc_register_card(). As part of the work to allow independant registration of cards, codecs and machines in ASoC v2 a new function of the same name has been added so rename the existing function to facilitate the merge of v2. Signed-off-by: Mark Brown --- include/sound/soc.h | 2 +- sound/soc/codecs/ac97.c | 2 +- sound/soc/codecs/ad1980.c | 2 +- sound/soc/codecs/ad73311.c | 2 +- sound/soc/codecs/ak4535.c | 2 +- sound/soc/codecs/cs4270.c | 2 +- sound/soc/codecs/pcm3008.c | 2 +- sound/soc/codecs/ssm2602.c | 2 +- sound/soc/codecs/tlv320aic23.c | 2 +- sound/soc/codecs/tlv320aic26.c | 2 +- sound/soc/codecs/tlv320aic3x.c | 2 +- sound/soc/codecs/twl4030.c | 2 +- sound/soc/codecs/uda134x.c | 2 +- sound/soc/codecs/uda1380.c | 2 +- sound/soc/codecs/wm8510.c | 2 +- sound/soc/codecs/wm8580.c | 2 +- sound/soc/codecs/wm8728.c | 2 +- sound/soc/codecs/wm8731.c | 2 +- sound/soc/codecs/wm8750.c | 2 +- sound/soc/codecs/wm8753.c | 2 +- sound/soc/codecs/wm8900.c | 2 +- sound/soc/codecs/wm8903.c | 2 +- sound/soc/codecs/wm8971.c | 2 +- sound/soc/codecs/wm8990.c | 2 +- sound/soc/codecs/wm9712.c | 2 +- sound/soc/codecs/wm9713.c | 2 +- sound/soc/soc-core.c | 6 +++--- 27 files changed, 29 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 444f9c211379..9356c1ce98c1 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -162,7 +162,7 @@ extern struct snd_ac97_bus_ops soc_ac97_ops; /* pcm <-> DAI connect */ void snd_soc_free_pcms(struct snd_soc_device *socdev); int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid); -int snd_soc_register_card(struct snd_soc_device *socdev); +int snd_soc_init_card(struct snd_soc_device *socdev); /* set runtime hw params */ int snd_soc_set_runtime_hwparams(struct snd_pcm_substream *substream, diff --git a/sound/soc/codecs/ac97.c b/sound/soc/codecs/ac97.c index c4208c8210c8..fb53e6511af2 100644 --- a/sound/soc/codecs/ac97.c +++ b/sound/soc/codecs/ac97.c @@ -114,7 +114,7 @@ static int ac97_soc_probe(struct platform_device *pdev) if (ret < 0) goto bus_err; - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) goto bus_err; return 0; diff --git a/sound/soc/codecs/ad1980.c b/sound/soc/codecs/ad1980.c index a9a268112d3f..73fdbb4d4a3d 100644 --- a/sound/soc/codecs/ad1980.c +++ b/sound/soc/codecs/ad1980.c @@ -270,7 +270,7 @@ static int ad1980_soc_probe(struct platform_device *pdev) ac97_write(codec, AC97_EXTENDED_STATUS, ext_status&~0x3800); ad1980_add_controls(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "ad1980: failed to register card\n"); goto reset_err; diff --git a/sound/soc/codecs/ad73311.c b/sound/soc/codecs/ad73311.c index 59c4c8f18cbb..0f4110f4bceb 100644 --- a/sound/soc/codecs/ad73311.c +++ b/sound/soc/codecs/ad73311.c @@ -67,7 +67,7 @@ static int ad73311_soc_probe(struct platform_device *pdev) goto pcm_err; } - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "ad73311: failed to register card\n"); goto register_err; diff --git a/sound/soc/codecs/ak4535.c b/sound/soc/codecs/ak4535.c index c742290e5533..23062c952e85 100644 --- a/sound/soc/codecs/ak4535.c +++ b/sound/soc/codecs/ak4535.c @@ -512,7 +512,7 @@ static int ak4535_init(struct snd_soc_device *socdev) ak4535_add_controls(codec); ak4535_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "ak4535: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/cs4270.c b/sound/soc/codecs/cs4270.c index 7507d468b200..4667a07b566c 100644 --- a/sound/soc/codecs/cs4270.c +++ b/sound/soc/codecs/cs4270.c @@ -723,7 +723,7 @@ static int cs4270_probe(struct platform_device *pdev) printk(KERN_INFO "cs4270: I2C disabled, using stand-alone mode\n"); #endif - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "cs4270: failed to register card\n"); goto error_del_driver; diff --git a/sound/soc/codecs/pcm3008.c b/sound/soc/codecs/pcm3008.c index 651a15eb8c2c..a5862555b444 100644 --- a/sound/soc/codecs/pcm3008.c +++ b/sound/soc/codecs/pcm3008.c @@ -91,7 +91,7 @@ static int pcm3008_soc_probe(struct platform_device *pdev) } /* Register Card. */ - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "pcm3008: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/ssm2602.c b/sound/soc/codecs/ssm2602.c index 0c5884ea1b00..973844973fe1 100644 --- a/sound/soc/codecs/ssm2602.c +++ b/sound/soc/codecs/ssm2602.c @@ -624,7 +624,7 @@ static int ssm2602_init(struct snd_soc_device *socdev) ssm2602_add_controls(codec); ssm2602_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { pr_err("ssm2602: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/tlv320aic23.c b/sound/soc/codecs/tlv320aic23.c index a4e13d0688c9..d209bec02a69 100644 --- a/sound/soc/codecs/tlv320aic23.c +++ b/sound/soc/codecs/tlv320aic23.c @@ -720,7 +720,7 @@ static int tlv320aic23_init(struct snd_soc_device *socdev) tlv320aic23_add_controls(codec); tlv320aic23_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "tlv320aic23: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/tlv320aic26.c b/sound/soc/codecs/tlv320aic26.c index 6b7ddfc92573..e33fb7e00d1e 100644 --- a/sound/soc/codecs/tlv320aic26.c +++ b/sound/soc/codecs/tlv320aic26.c @@ -359,7 +359,7 @@ static int aic26_probe(struct platform_device *pdev) /* CODEC is setup, we can register the card now */ dev_dbg(&pdev->dev, "Registering card\n"); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { dev_err(&pdev->dev, "aic26: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index 255e784c805b..0f4067bdd4a3 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -1187,7 +1187,7 @@ static int aic3x_init(struct snd_soc_device *socdev) aic3x_add_controls(codec); aic3x_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "aic3x: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/twl4030.c b/sound/soc/codecs/twl4030.c index 413623147891..f3e9e591b52f 100644 --- a/sound/soc/codecs/twl4030.c +++ b/sound/soc/codecs/twl4030.c @@ -764,7 +764,7 @@ static int twl4030_init(struct snd_soc_device *socdev) twl4030_add_controls(codec); twl4030_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "twl4030: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/uda134x.c b/sound/soc/codecs/uda134x.c index 91f333cdc7cf..58de749185e6 100644 --- a/sound/soc/codecs/uda134x.c +++ b/sound/soc/codecs/uda134x.c @@ -578,7 +578,7 @@ static int uda134x_soc_probe(struct platform_device *pdev) goto pcm_err; } - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "UDA134X: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/uda1380.c b/sound/soc/codecs/uda1380.c index 330877c70699..42491650593f 100644 --- a/sound/soc/codecs/uda1380.c +++ b/sound/soc/codecs/uda1380.c @@ -677,7 +677,7 @@ static int uda1380_init(struct snd_soc_device *socdev, int dac_clk) /* uda1380 init */ uda1380_add_controls(codec); uda1380_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { pr_err("uda1380: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/wm8510.c b/sound/soc/codecs/wm8510.c index 173b66c0c766..126c70f749d1 100644 --- a/sound/soc/codecs/wm8510.c +++ b/sound/soc/codecs/wm8510.c @@ -658,7 +658,7 @@ static int wm8510_init(struct snd_soc_device *socdev) wm8510_set_bias_level(codec, SND_SOC_BIAS_STANDBY); wm8510_add_controls(codec); wm8510_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "wm8510: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/wm8580.c b/sound/soc/codecs/wm8580.c index 220d4b68904a..572a31de3219 100644 --- a/sound/soc/codecs/wm8580.c +++ b/sound/soc/codecs/wm8580.c @@ -869,7 +869,7 @@ static int wm8580_init(struct snd_soc_device *socdev) wm8580_add_controls(codec); wm8580_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "wm8580: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/wm8728.c b/sound/soc/codecs/wm8728.c index 71949bd320d3..28f12c6a6ac8 100644 --- a/sound/soc/codecs/wm8728.c +++ b/sound/soc/codecs/wm8728.c @@ -332,7 +332,7 @@ static int wm8728_init(struct snd_soc_device *socdev) wm8728_add_controls(codec); wm8728_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "wm8728: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c index c0f277053bb2..403dea13b5d9 100644 --- a/sound/soc/codecs/wm8731.c +++ b/sound/soc/codecs/wm8731.c @@ -545,7 +545,7 @@ static int wm8731_init(struct snd_soc_device *socdev) wm8731_add_controls(codec); wm8731_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "wm8731: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/wm8750.c b/sound/soc/codecs/wm8750.c index 860a1d56830a..979446f5c983 100644 --- a/sound/soc/codecs/wm8750.c +++ b/sound/soc/codecs/wm8750.c @@ -818,7 +818,7 @@ static int wm8750_init(struct snd_soc_device *socdev) wm8750_add_controls(codec); wm8750_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "wm8750: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c index 5e4cd3bb824a..96c0453fffb3 100644 --- a/sound/soc/codecs/wm8753.c +++ b/sound/soc/codecs/wm8753.c @@ -1605,7 +1605,7 @@ static int wm8753_init(struct snd_soc_device *socdev) wm8753_add_controls(codec); wm8753_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "wm8753: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/wm8900.c b/sound/soc/codecs/wm8900.c index d1326be91c8b..29cd83991c5b 100644 --- a/sound/soc/codecs/wm8900.c +++ b/sound/soc/codecs/wm8900.c @@ -1365,7 +1365,7 @@ static int wm8900_init(struct snd_soc_device *socdev) wm8900_add_controls(codec); wm8900_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { dev_err(&i2c_client->dev, "Failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index efbe8927b7d2..393a4c198823 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -1648,7 +1648,7 @@ static int wm8903_init(struct snd_soc_device *socdev) wm8903_add_controls(codec); wm8903_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { dev_err(&i2c->dev, "wm8903: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/wm8971.c b/sound/soc/codecs/wm8971.c index 26edcc9d6e87..53e6937e9ba1 100644 --- a/sound/soc/codecs/wm8971.c +++ b/sound/soc/codecs/wm8971.c @@ -747,7 +747,7 @@ static int wm8971_init(struct snd_soc_device *socdev) wm8971_add_controls(codec); wm8971_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "wm8971: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/wm8990.c b/sound/soc/codecs/wm8990.c index 13926516d16e..5c5128b6b453 100644 --- a/sound/soc/codecs/wm8990.c +++ b/sound/soc/codecs/wm8990.c @@ -1462,7 +1462,7 @@ static int wm8990_init(struct snd_soc_device *socdev) wm8990_add_controls(codec); wm8990_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "wm8990: failed to register card\n"); goto card_err; diff --git a/sound/soc/codecs/wm9712.c b/sound/soc/codecs/wm9712.c index 40f14061fb72..af83d629078a 100644 --- a/sound/soc/codecs/wm9712.c +++ b/sound/soc/codecs/wm9712.c @@ -700,7 +700,7 @@ static int wm9712_soc_probe(struct platform_device *pdev) wm9712_set_bias_level(codec, SND_SOC_BIAS_STANDBY); wm9712_add_controls(codec); wm9712_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) { printk(KERN_ERR "wm9712: failed to register card\n"); goto reset_err; diff --git a/sound/soc/codecs/wm9713.c b/sound/soc/codecs/wm9713.c index 9dad0bffcb05..49962a88770d 100644 --- a/sound/soc/codecs/wm9713.c +++ b/sound/soc/codecs/wm9713.c @@ -1247,7 +1247,7 @@ static int wm9713_soc_probe(struct platform_device *pdev) wm9713_add_controls(codec); wm9713_add_widgets(codec); - ret = snd_soc_register_card(socdev); + ret = snd_soc_init_card(socdev); if (ret < 0) goto reset_err; return 0; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 13b4aaff0e9c..0448708245e7 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1233,7 +1233,7 @@ int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid) EXPORT_SYMBOL_GPL(snd_soc_new_pcms); /** - * snd_soc_register_card - register sound card + * snd_soc_init_card - register sound card * @socdev: the SoC audio device * * Register a SoC sound card. Also registers an AC97 device if the @@ -1241,7 +1241,7 @@ EXPORT_SYMBOL_GPL(snd_soc_new_pcms); * * Returns 0 for success, else error. */ -int snd_soc_register_card(struct snd_soc_device *socdev) +int snd_soc_init_card(struct snd_soc_device *socdev) { struct snd_soc_codec *codec = socdev->codec; struct snd_soc_card *card = socdev->card; @@ -1298,7 +1298,7 @@ int snd_soc_register_card(struct snd_soc_device *socdev) out: return ret; } -EXPORT_SYMBOL_GPL(snd_soc_register_card); +EXPORT_SYMBOL_GPL(snd_soc_init_card); /** * snd_soc_free_pcms - free sound card and pcms -- cgit v1.2.3 From 6308419a199eed66086cd756ab8dc81b88d54a6b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 2 Dec 2008 15:08:03 +0000 Subject: ASoC: Push workqueue data into snd_soc_card ASoC v2 does not use the struct snd_soc_device at runtime, using struct snd_soc_card as the root of the card. Begin removing data from snd_soc_device by pushing the workqueue data into snd_soc_card, using a backpointer to the snd_soc_device to keep things going for the time being. Signed-off-by: Mark Brown --- include/sound/soc.h | 7 +++++-- sound/soc/soc-core.c | 33 ++++++++++++++++++++------------- 2 files changed, 25 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 9356c1ce98c1..359ec49f8d0d 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -349,6 +349,11 @@ struct snd_soc_card { /* CPU <--> Codec DAI links */ struct snd_soc_dai_link *dai_link; int num_links; + + struct snd_soc_device *socdev; + + struct delayed_work delayed_work; + struct work_struct deferred_resume_work; }; /* SoC Device - the audio subsystem */ @@ -358,8 +363,6 @@ struct snd_soc_device { struct snd_soc_platform *platform; struct snd_soc_codec *codec; struct snd_soc_codec_device *codec_dev; - struct delayed_work delayed_work; - struct work_struct deferred_resume_work; void *codec_data; #ifdef CONFIG_DEBUG_FS struct dentry *debugfs_root; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 7eb2ea1fd42e..c4b22e6984e6 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -247,8 +247,9 @@ out: */ static void close_delayed_work(struct work_struct *work) { - struct snd_soc_device *socdev = - container_of(work, struct snd_soc_device, delayed_work.work); + struct snd_soc_card *card = container_of(work, struct snd_soc_card, + delayed_work.work); + struct snd_soc_device *socdev = card->socdev; struct snd_soc_codec *codec = socdev->codec; struct snd_soc_dai *codec_dai; int i; @@ -299,6 +300,7 @@ static int soc_codec_close(struct snd_pcm_substream *substream) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_card *card = socdev->card; struct snd_soc_dai_link *machine = rtd->dai; struct snd_soc_platform *platform = socdev->platform; struct snd_soc_dai *cpu_dai = machine->cpu_dai; @@ -340,7 +342,7 @@ static int soc_codec_close(struct snd_pcm_substream *substream) if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { /* start delayed pop wq here for playback streams */ codec_dai->pop_wait = 1; - schedule_delayed_work(&socdev->delayed_work, + schedule_delayed_work(&card->delayed_work, msecs_to_jiffies(pmdown_time)); } else { /* capture streams can be powered down now */ @@ -366,6 +368,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_card *card = socdev->card; struct snd_soc_dai_link *machine = rtd->dai; struct snd_soc_platform *platform = socdev->platform; struct snd_soc_dai *cpu_dai = machine->cpu_dai; @@ -411,7 +414,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream) if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK && codec_dai->pop_wait) { codec_dai->pop_wait = 0; - cancel_delayed_work(&socdev->delayed_work); + cancel_delayed_work(&card->delayed_work); } /* do we need to power up codec */ @@ -645,7 +648,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) } /* close any waiting streams and save state */ - run_delayed_work(&socdev->delayed_work); + run_delayed_work(&card->delayed_work); codec->suspend_bias_level = codec->bias_level; for (i = 0; i < codec->num_dai; i++) { @@ -679,10 +682,10 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) */ static void soc_resume_deferred(struct work_struct *work) { - struct snd_soc_device *socdev = container_of(work, - struct snd_soc_device, - deferred_resume_work); - struct snd_soc_card *card = socdev->card; + struct snd_soc_card *card = container_of(work, + struct snd_soc_card, + deferred_resume_work); + struct snd_soc_device *socdev = card->socdev; struct snd_soc_platform *platform = socdev->platform; struct snd_soc_codec_device *codec_dev = socdev->codec_dev; struct snd_soc_codec *codec = socdev->codec; @@ -746,10 +749,11 @@ static void soc_resume_deferred(struct work_struct *work) static int soc_resume(struct platform_device *pdev) { struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_card *card = socdev->card; dev_dbg(socdev->dev, "scheduling resume work\n"); - if (!schedule_work(&socdev->deferred_resume_work)) + if (!schedule_work(&card->deferred_resume_work)) dev_err(socdev->dev, "resume work item may be lost\n"); return 0; @@ -769,6 +773,9 @@ static int soc_probe(struct platform_device *pdev) struct snd_soc_platform *platform = socdev->platform; struct snd_soc_codec_device *codec_dev = socdev->codec_dev; + /* Bodge while we push things out of socdev */ + card->socdev = socdev; + if (card->probe) { ret = card->probe(pdev); if (ret < 0) @@ -797,10 +804,10 @@ static int soc_probe(struct platform_device *pdev) } /* DAPM stream work */ - INIT_DELAYED_WORK(&socdev->delayed_work, close_delayed_work); + INIT_DELAYED_WORK(&card->delayed_work, close_delayed_work); #ifdef CONFIG_PM /* deferred resume work */ - INIT_WORK(&socdev->deferred_resume_work, soc_resume_deferred); + INIT_WORK(&card->deferred_resume_work, soc_resume_deferred); #endif return 0; @@ -831,7 +838,7 @@ static int soc_remove(struct platform_device *pdev) struct snd_soc_platform *platform = socdev->platform; struct snd_soc_codec_device *codec_dev = socdev->codec_dev; - run_delayed_work(&socdev->delayed_work); + run_delayed_work(&card->delayed_work); if (platform->remove) platform->remove(pdev); -- cgit v1.2.3 From 87689d567a45f80416feea0a2aa6d3a2a6b8963a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 2 Dec 2008 16:01:14 +0000 Subject: ASoC: Push platform registration down into the card As part of the deprecation of snd_soc_device push the registration of the platform down into the card structure. Signed-off-by: Mark Brown --- include/sound/soc.h | 2 +- sound/soc/atmel/playpaq_wm8510.c | 2 +- sound/soc/atmel/sam9g20_wm8731.c | 2 +- sound/soc/blackfin/bf5xx-ad1980.c | 2 +- sound/soc/blackfin/bf5xx-ad73311.c | 2 +- sound/soc/blackfin/bf5xx-ssm2602.c | 2 +- sound/soc/davinci/davinci-evm.c | 2 +- sound/soc/davinci/davinci-i2s.c | 2 +- sound/soc/davinci/davinci-sffsdr.c | 2 +- sound/soc/fsl/mpc8610_hpcd.c | 2 +- sound/soc/fsl/soc-of-simple.c | 2 +- sound/soc/omap/n810.c | 2 +- sound/soc/omap/omap2evm.c | 2 +- sound/soc/omap/omap3beagle.c | 2 +- sound/soc/omap/osk5912.c | 2 +- sound/soc/omap/overo.c | 2 +- sound/soc/omap/sdp3430.c | 2 +- sound/soc/pxa/corgi.c | 2 +- sound/soc/pxa/e800_wm9712.c | 2 +- sound/soc/pxa/em-x270.c | 2 +- sound/soc/pxa/palm27x.c | 2 +- sound/soc/pxa/poodle.c | 2 +- sound/soc/pxa/spitz.c | 2 +- sound/soc/pxa/tosa.c | 2 +- sound/soc/pxa/zylonite.c | 2 +- sound/soc/s3c24xx/ln2440sbc_alc650.c | 2 +- sound/soc/s3c24xx/neo1973_wm8753.c | 2 +- sound/soc/s3c24xx/s3c24xx_uda134x.c | 2 +- sound/soc/s3c24xx/smdk2443_wm9710.c | 2 +- sound/soc/sh/sh7760-ac97.c | 2 +- sound/soc/soc-core.c | 44 ++++++++++++++++++++---------------- 31 files changed, 55 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 359ec49f8d0d..ad8141acd6b0 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -352,6 +352,7 @@ struct snd_soc_card { struct snd_soc_device *socdev; + struct snd_soc_platform *platform; struct delayed_work delayed_work; struct work_struct deferred_resume_work; }; @@ -360,7 +361,6 @@ struct snd_soc_card { struct snd_soc_device { struct device *dev; struct snd_soc_card *card; - struct snd_soc_platform *platform; struct snd_soc_codec *codec; struct snd_soc_codec_device *codec_dev; void *codec_data; diff --git a/sound/soc/atmel/playpaq_wm8510.c b/sound/soc/atmel/playpaq_wm8510.c index d40b5a52a8d2..43dd8cee83c6 100644 --- a/sound/soc/atmel/playpaq_wm8510.c +++ b/sound/soc/atmel/playpaq_wm8510.c @@ -363,6 +363,7 @@ static struct snd_soc_dai_link playpaq_wm8510_dai = { static struct snd_soc_card snd_soc_playpaq = { .name = "LRS_PlayPaq_WM8510", + .platform = &at32_soc_platform, .dai_link = &playpaq_wm8510_dai, .num_links = 1, }; @@ -378,7 +379,6 @@ static struct wm8510_setup_data playpaq_wm8510_setup = { static struct snd_soc_device playpaq_wm8510_snd_devdata = { .card = &snd_soc_playpaq, - .platform = &at32_soc_platform, .codec_dev = &soc_codec_dev_wm8510, .codec_data = &playpaq_wm8510_setup, }; diff --git a/sound/soc/atmel/sam9g20_wm8731.c b/sound/soc/atmel/sam9g20_wm8731.c index fdc1d0206e0b..1fb59a9d3719 100644 --- a/sound/soc/atmel/sam9g20_wm8731.c +++ b/sound/soc/atmel/sam9g20_wm8731.c @@ -244,6 +244,7 @@ static struct snd_soc_dai_link at91sam9g20ek_dai = { static struct snd_soc_card snd_soc_at91sam9g20ek = { .name = "WM8731", + .platform = &atmel_soc_platform, .dai_link = &at91sam9g20ek_dai, .num_links = 1, }; @@ -255,7 +256,6 @@ static struct wm8731_setup_data at91sam9g20ek_wm8731_setup = { static struct snd_soc_device at91sam9g20ek_snd_devdata = { .card = &snd_soc_at91sam9g20ek, - .platform = &atmel_soc_platform, .codec_dev = &soc_codec_dev_wm8731, .codec_data = &at91sam9g20ek_wm8731_setup, }; diff --git a/sound/soc/blackfin/bf5xx-ad1980.c b/sound/soc/blackfin/bf5xx-ad1980.c index 36c569a43ce1..d8f591273778 100644 --- a/sound/soc/blackfin/bf5xx-ad1980.c +++ b/sound/soc/blackfin/bf5xx-ad1980.c @@ -69,13 +69,13 @@ static struct snd_soc_dai_link bf5xx_board_dai = { static struct snd_soc_card bf5xx_board = { .name = "bf5xx-board", + .platform = &bf5xx_ac97_soc_platform, .dai_link = &bf5xx_board_dai, .num_links = 1, }; static struct snd_soc_device bf5xx_board_snd_devdata = { .card = &bf5xx_board, - .platform = &bf5xx_ac97_soc_platform, .codec_dev = &soc_codec_dev_ad1980, }; diff --git a/sound/soc/blackfin/bf5xx-ad73311.c b/sound/soc/blackfin/bf5xx-ad73311.c index 57da14799375..7f2a5e199075 100644 --- a/sound/soc/blackfin/bf5xx-ad73311.c +++ b/sound/soc/blackfin/bf5xx-ad73311.c @@ -192,6 +192,7 @@ static struct snd_soc_dai_link bf5xx_ad73311_dai = { static struct snd_soc_card bf5xx_ad73311 = { .name = "bf5xx_ad73311", + .platform = &bf5xx_i2s_soc_platform, .probe = bf5xx_probe, .dai_link = &bf5xx_ad73311_dai, .num_links = 1, @@ -199,7 +200,6 @@ static struct snd_soc_card bf5xx_ad73311 = { static struct snd_soc_device bf5xx_ad73311_snd_devdata = { .card = &bf5xx_ad73311, - .platform = &bf5xx_i2s_soc_platform, .codec_dev = &soc_codec_dev_ad73311, }; diff --git a/sound/soc/blackfin/bf5xx-ssm2602.c b/sound/soc/blackfin/bf5xx-ssm2602.c index 0078dfcd95b9..bc0cdded7116 100644 --- a/sound/soc/blackfin/bf5xx-ssm2602.c +++ b/sound/soc/blackfin/bf5xx-ssm2602.c @@ -137,13 +137,13 @@ static struct ssm2602_setup_data bf5xx_ssm2602_setup = { static struct snd_soc_card bf5xx_ssm2602 = { .name = "bf5xx_ssm2602", + .platform = &bf5xx_i2s_soc_platform, .dai_link = &bf5xx_ssm2602_dai, .num_links = 1, }; static struct snd_soc_device bf5xx_ssm2602_snd_devdata = { .card = &bf5xx_ssm2602, - .platform = &bf5xx_i2s_soc_platform, .codec_dev = &soc_codec_dev_ssm2602, .codec_data = &bf5xx_ssm2602_setup, }; diff --git a/sound/soc/davinci/davinci-evm.c b/sound/soc/davinci/davinci-evm.c index 2ce34d44b15c..d87b91179cc8 100644 --- a/sound/soc/davinci/davinci-evm.c +++ b/sound/soc/davinci/davinci-evm.c @@ -130,6 +130,7 @@ static struct snd_soc_dai_link evm_dai = { /* davinci-evm audio machine driver */ static struct snd_soc_card snd_soc_card_evm = { .name = "DaVinci EVM", + .platform = &davinci_soc_platform, .dai_link = &evm_dai, .num_links = 1, }; @@ -143,7 +144,6 @@ static struct aic3x_setup_data evm_aic3x_setup = { /* evm audio subsystem */ static struct snd_soc_device evm_snd_devdata = { .card = &snd_soc_card_evm, - .platform = &davinci_soc_platform, .codec_dev = &soc_codec_dev_aic3x, .codec_data = &evm_aic3x_setup, }; diff --git a/sound/soc/davinci/davinci-i2s.c b/sound/soc/davinci/davinci-i2s.c index cf31b3bb96cf..8b99efbc64c6 100644 --- a/sound/soc/davinci/davinci-i2s.c +++ b/sound/soc/davinci/davinci-i2s.c @@ -112,7 +112,7 @@ static void davinci_mcbsp_start(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *rtd = substream->private_data; struct davinci_mcbsp_dev *dev = rtd->dai->cpu_dai->private_data; struct snd_soc_device *socdev = rtd->socdev; - struct snd_soc_platform *platform = socdev->platform; + struct snd_soc_platform *platform = socdev->card->platform; u32 w; int ret; diff --git a/sound/soc/davinci/davinci-sffsdr.c b/sound/soc/davinci/davinci-sffsdr.c index e95fde1766b5..f67579d52765 100644 --- a/sound/soc/davinci/davinci-sffsdr.c +++ b/sound/soc/davinci/davinci-sffsdr.c @@ -76,6 +76,7 @@ static struct snd_soc_dai_link sffsdr_dai = { /* davinci-sffsdr audio machine driver */ static struct snd_soc_card snd_soc_sffsdr = { .name = "DaVinci SFFSDR", + .platform = &davinci_soc_platform, .dai_link = &sffsdr_dai, .num_links = 1, }; @@ -91,7 +92,6 @@ static struct pcm3008_setup_data sffsdr_pcm3008_setup = { /* sffsdr audio subsystem */ static struct snd_soc_device sffsdr_snd_devdata = { .card = &snd_soc_sffsdr, - .platform = &davinci_soc_platform, .codec_dev = &soc_codec_dev_pcm3008, .codec_data = &sffsdr_pcm3008_setup, }; diff --git a/sound/soc/fsl/mpc8610_hpcd.c b/sound/soc/fsl/mpc8610_hpcd.c index 1cf4d6eeb538..bcec3f60bad9 100644 --- a/sound/soc/fsl/mpc8610_hpcd.c +++ b/sound/soc/fsl/mpc8610_hpcd.c @@ -467,7 +467,7 @@ static int mpc8610_hpcd_probe(struct of_device *ofdev, machine_data->sound_devdata.card = &mpc8610_hpcd_machine; machine_data->sound_devdata.codec_dev = &soc_codec_device_cs4270; - machine_data->sound_devdata.platform = &fsl_soc_platform; + machine_data->machine.platform = &fsl_soc_platform; sound_device->dev.platform_data = machine_data; diff --git a/sound/soc/fsl/soc-of-simple.c b/sound/soc/fsl/soc-of-simple.c index 53be6491320a..8bc5cd9e972f 100644 --- a/sound/soc/fsl/soc-of-simple.c +++ b/sound/soc/fsl/soc-of-simple.c @@ -158,7 +158,7 @@ int of_snd_soc_register_platform(struct snd_soc_platform *platform, of_soc->platform_node = node; of_soc->dai_link.cpu_dai = cpu_dai; - of_soc->device.platform = platform; + of_soc->card.platform = platform; of_soc->card.name = of_soc->dai_link.cpu_dai->name; /* Now try to register the SoC device */ diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c index 18e2062e3a11..25593fee9121 100644 --- a/sound/soc/omap/n810.c +++ b/sound/soc/omap/n810.c @@ -288,6 +288,7 @@ static struct snd_soc_dai_link n810_dai = { /* Audio machine driver */ static struct snd_soc_card snd_soc_n810 = { .name = "N810", + .platform = &omap_soc_platform, .dai_link = &n810_dai, .num_links = 1, }; @@ -303,7 +304,6 @@ static struct aic3x_setup_data n810_aic33_setup = { /* Audio subsystem */ static struct snd_soc_device n810_snd_devdata = { .card = &snd_soc_n810, - .platform = &omap_soc_platform, .codec_dev = &soc_codec_dev_aic3x, .codec_data = &n810_aic33_setup, }; diff --git a/sound/soc/omap/omap2evm.c b/sound/soc/omap/omap2evm.c index 7b160f9d83f9..0c2322dcf02a 100644 --- a/sound/soc/omap/omap2evm.c +++ b/sound/soc/omap/omap2evm.c @@ -93,6 +93,7 @@ static struct snd_soc_dai_link omap2evm_dai = { /* Audio machine driver */ static struct snd_soc_card snd_soc_omap2evm = { .name = "omap2evm", + .platform = &omap_soc_platform, .dai_link = &omap2evm_dai, .num_links = 1, }; @@ -100,7 +101,6 @@ static struct snd_soc_card snd_soc_omap2evm = { /* Audio subsystem */ static struct snd_soc_device omap2evm_snd_devdata = { .card = &snd_soc_omap2evm, - .platform = &omap_soc_platform, .codec_dev = &soc_codec_dev_twl4030, }; diff --git a/sound/soc/omap/omap3beagle.c b/sound/soc/omap/omap3beagle.c index 3ed25464627f..fd24a4acd2f5 100644 --- a/sound/soc/omap/omap3beagle.c +++ b/sound/soc/omap/omap3beagle.c @@ -90,6 +90,7 @@ static struct snd_soc_dai_link omap3beagle_dai = { /* Audio machine driver */ static struct snd_soc_card snd_soc_omap3beagle = { .name = "omap3beagle", + .platform = &omap_soc_platform, .dai_link = &omap3beagle_dai, .num_links = 1, }; @@ -97,7 +98,6 @@ static struct snd_soc_card snd_soc_omap3beagle = { /* Audio subsystem */ static struct snd_soc_device omap3beagle_snd_devdata = { .card = &snd_soc_omap3beagle, - .platform = &omap_soc_platform, .codec_dev = &soc_codec_dev_twl4030, }; diff --git a/sound/soc/omap/osk5912.c b/sound/soc/omap/osk5912.c index 7a8f14d0c772..845bf41335b9 100644 --- a/sound/soc/omap/osk5912.c +++ b/sound/soc/omap/osk5912.c @@ -145,6 +145,7 @@ static struct snd_soc_dai_link osk_dai = { /* Audio machine driver */ static struct snd_soc_card snd_soc_card_osk = { .name = "OSK5912", + .platform = &omap_soc_platform, .dai_link = &osk_dai, .num_links = 1, }; @@ -152,7 +153,6 @@ static struct snd_soc_card snd_soc_card_osk = { /* Audio subsystem */ static struct snd_soc_device osk_snd_devdata = { .card = &snd_soc_card_osk, - .platform = &omap_soc_platform, .codec_dev = &soc_codec_dev_tlv320aic23, }; diff --git a/sound/soc/omap/overo.c b/sound/soc/omap/overo.c index eea0c372bb3f..a72dc4e159e5 100644 --- a/sound/soc/omap/overo.c +++ b/sound/soc/omap/overo.c @@ -90,6 +90,7 @@ static struct snd_soc_dai_link overo_dai = { /* Audio machine driver */ static struct snd_soc_card snd_soc_card_overo = { .name = "overo", + .platform = &omap_soc_platform, .dai_link = &overo_dai, .num_links = 1, }; @@ -97,7 +98,6 @@ static struct snd_soc_card snd_soc_card_overo = { /* Audio subsystem */ static struct snd_soc_device overo_snd_devdata = { .card = &snd_soc_card_overo, - .platform = &omap_soc_platform, .codec_dev = &soc_codec_dev_twl4030, }; diff --git a/sound/soc/omap/sdp3430.c b/sound/soc/omap/sdp3430.c index 85fd160bca17..ad97836818b1 100644 --- a/sound/soc/omap/sdp3430.c +++ b/sound/soc/omap/sdp3430.c @@ -93,6 +93,7 @@ static struct snd_soc_dai_link sdp3430_dai = { /* Audio machine driver */ static struct snd_soc_machine snd_soc_machine_sdp3430 = { .name = "SDP3430", + .platform = &omap_soc_platform, .dai_link = &sdp3430_dai, .num_links = 1, }; @@ -100,7 +101,6 @@ static struct snd_soc_machine snd_soc_machine_sdp3430 = { /* Audio subsystem */ static struct snd_soc_device sdp3430_snd_devdata = { .machine = &snd_soc_machine_sdp3430, - .platform = &omap_soc_platform, .codec_dev = &soc_codec_dev_twl4030, }; diff --git a/sound/soc/pxa/corgi.c b/sound/soc/pxa/corgi.c index e56bf4b6c2af..1ba25a559524 100644 --- a/sound/soc/pxa/corgi.c +++ b/sound/soc/pxa/corgi.c @@ -312,6 +312,7 @@ static struct snd_soc_dai_link corgi_dai = { /* corgi audio machine driver */ static struct snd_soc_card snd_soc_corgi = { .name = "Corgi", + .platform = &pxa2xx_soc_platform, .dai_link = &corgi_dai, .num_links = 1, }; @@ -325,7 +326,6 @@ static struct wm8731_setup_data corgi_wm8731_setup = { /* corgi audio subsystem */ static struct snd_soc_device corgi_snd_devdata = { .card = &snd_soc_corgi, - .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm8731, .codec_data = &corgi_wm8731_setup, }; diff --git a/sound/soc/pxa/e800_wm9712.c b/sound/soc/pxa/e800_wm9712.c index 60c64861512a..2e3386dfa0f0 100644 --- a/sound/soc/pxa/e800_wm9712.c +++ b/sound/soc/pxa/e800_wm9712.c @@ -42,13 +42,13 @@ static struct snd_soc_dai_link e800_dai[] = { static struct snd_soc_card e800 = { .name = "Toshiba e800", + .platform = &pxa2xx_soc_platform, .dai_link = e800_dai, .num_links = ARRAY_SIZE(e800_dai), }; static struct snd_soc_device e800_snd_devdata = { .card = &e800, - .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm9712, }; diff --git a/sound/soc/pxa/em-x270.c b/sound/soc/pxa/em-x270.c index d6884b755d55..fe4a729ea648 100644 --- a/sound/soc/pxa/em-x270.c +++ b/sound/soc/pxa/em-x270.c @@ -54,13 +54,13 @@ static struct snd_soc_dai_link em_x270_dai[] = { static struct snd_soc_card em_x270 = { .name = "EM-X270", + .platform = &pxa2xx_soc_platform, .dai_link = em_x270_dai, .num_links = ARRAY_SIZE(em_x270_dai), }; static struct snd_soc_device em_x270_snd_devdata = { .card = &em_x270, - .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm9712, }; diff --git a/sound/soc/pxa/palm27x.c b/sound/soc/pxa/palm27x.c index 3bb8879ac8a2..4a9cf3083af0 100644 --- a/sound/soc/pxa/palm27x.c +++ b/sound/soc/pxa/palm27x.c @@ -191,13 +191,13 @@ static struct snd_soc_dai_link palm27x_dai[] = { static struct snd_soc_card palm27x_asoc = { .name = "Palm/PXA27x", + .platform = &pxa2xx_soc_platform, .dai_link = palm27x_dai, .num_links = ARRAY_SIZE(palm27x_dai), }; static struct snd_soc_device palm27x_snd_devdata = { .card = &palm27x_asoc, - .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm9712, }; diff --git a/sound/soc/pxa/poodle.c b/sound/soc/pxa/poodle.c index 03b510ab2824..6e9827189fff 100644 --- a/sound/soc/pxa/poodle.c +++ b/sound/soc/pxa/poodle.c @@ -278,6 +278,7 @@ static struct snd_soc_dai_link poodle_dai = { /* poodle audio machine driver */ static struct snd_soc_card snd_soc_poodle = { .name = "Poodle", + .platform = &pxa2xx_soc_platform, .dai_link = &poodle_dai, .num_links = 1, }; @@ -291,7 +292,6 @@ static struct wm8731_setup_data poodle_wm8731_setup = { /* poodle audio subsystem */ static struct snd_soc_device poodle_snd_devdata = { .card = &snd_soc_poodle, - .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm8731, .codec_data = &poodle_wm8731_setup, }; diff --git a/sound/soc/pxa/spitz.c b/sound/soc/pxa/spitz.c index 579d93368f14..a3b9e6bdf979 100644 --- a/sound/soc/pxa/spitz.c +++ b/sound/soc/pxa/spitz.c @@ -321,6 +321,7 @@ static struct snd_soc_dai_link spitz_dai = { /* spitz audio machine driver */ static struct snd_soc_card snd_soc_spitz = { .name = "Spitz", + .platform = &pxa2xx_soc_platform, .dai_link = &spitz_dai, .num_links = 1, }; @@ -334,7 +335,6 @@ static struct wm8750_setup_data spitz_wm8750_setup = { /* spitz audio subsystem */ static struct snd_soc_device spitz_snd_devdata = { .card = &snd_soc_spitz, - .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm8750, .codec_data = &spitz_wm8750_setup, }; diff --git a/sound/soc/pxa/tosa.c b/sound/soc/pxa/tosa.c index 48242b32a28b..c77194f74c9b 100644 --- a/sound/soc/pxa/tosa.c +++ b/sound/soc/pxa/tosa.c @@ -252,6 +252,7 @@ static int tosa_remove(struct platform_device *dev) static struct snd_soc_card tosa = { .name = "Tosa", + .platform = &pxa2xx_soc_platform, .dai_link = tosa_dai, .num_links = ARRAY_SIZE(tosa_dai), .probe = tosa_probe, @@ -260,7 +261,6 @@ static struct snd_soc_card tosa = { static struct snd_soc_device tosa_snd_devdata = { .card = &tosa, - .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm9712, }; diff --git a/sound/soc/pxa/zylonite.c b/sound/soc/pxa/zylonite.c index 842d6500d61f..f8e9ecd589d3 100644 --- a/sound/soc/pxa/zylonite.c +++ b/sound/soc/pxa/zylonite.c @@ -175,13 +175,13 @@ static struct snd_soc_dai_link zylonite_dai[] = { static struct snd_soc_card zylonite = { .name = "Zylonite", + .platform = &pxa2xx_soc_platform, .dai_link = zylonite_dai, .num_links = ARRAY_SIZE(zylonite_dai), }; static struct snd_soc_device zylonite_snd_ac97_devdata = { .card = &zylonite, - .platform = &pxa2xx_soc_platform, .codec_dev = &soc_codec_dev_wm9713, }; diff --git a/sound/soc/s3c24xx/ln2440sbc_alc650.c b/sound/soc/s3c24xx/ln2440sbc_alc650.c index a70cbc0fa070..12c71482d258 100644 --- a/sound/soc/s3c24xx/ln2440sbc_alc650.c +++ b/sound/soc/s3c24xx/ln2440sbc_alc650.c @@ -40,13 +40,13 @@ static struct snd_soc_dai_link ln2440sbc_dai[] = { static struct snd_soc_card ln2440sbc = { .name = "LN2440SBC", + .platform = &s3c24xx_soc_platform, .dai_link = ln2440sbc_dai, .num_links = ARRAY_SIZE(ln2440sbc_dai), }; static struct snd_soc_device ln2440sbc_snd_ac97_devdata = { .card = &ln2440sbc, - .platform = &s3c24xx_soc_platform, .codec_dev = &soc_codec_dev_ac97, }; diff --git a/sound/soc/s3c24xx/neo1973_wm8753.c b/sound/soc/s3c24xx/neo1973_wm8753.c index 3df2224a6723..45bb12e8ea44 100644 --- a/sound/soc/s3c24xx/neo1973_wm8753.c +++ b/sound/soc/s3c24xx/neo1973_wm8753.c @@ -580,6 +580,7 @@ static struct snd_soc_dai_link neo1973_dai[] = { static struct snd_soc_card neo1973 = { .name = "neo1973", + .platform = &s3c24xx_soc_platform, .dai_link = neo1973_dai, .num_links = ARRAY_SIZE(neo1973_dai), }; @@ -591,7 +592,6 @@ static struct wm8753_setup_data neo1973_wm8753_setup = { static struct snd_soc_device neo1973_snd_devdata = { .card = &neo1973, - .platform = &s3c24xx_soc_platform, .codec_dev = &soc_codec_dev_wm8753, .codec_data = &neo1973_wm8753_setup, }; diff --git a/sound/soc/s3c24xx/s3c24xx_uda134x.c b/sound/soc/s3c24xx/s3c24xx_uda134x.c index 23325fca1f64..a0a4d1832a14 100644 --- a/sound/soc/s3c24xx/s3c24xx_uda134x.c +++ b/sound/soc/s3c24xx/s3c24xx_uda134x.c @@ -234,6 +234,7 @@ static struct snd_soc_dai_link s3c24xx_uda134x_dai_link = { static struct snd_soc_card snd_soc_s3c24xx_uda134x = { .name = "S3C24XX_UDA134X", + .platform = &s3c24xx_soc_platform, .dai_link = &s3c24xx_uda134x_dai_link, .num_links = 1, }; @@ -271,7 +272,6 @@ static struct uda134x_platform_data s3c24xx_uda134x = { static struct snd_soc_device s3c24xx_uda134x_snd_devdata = { .card = &snd_soc_s3c24xx_uda134x, - .platform = &s3c24xx_soc_platform, .codec_dev = &soc_codec_dev_uda134x, .codec_data = &s3c24xx_uda134x, }; diff --git a/sound/soc/s3c24xx/smdk2443_wm9710.c b/sound/soc/s3c24xx/smdk2443_wm9710.c index 3d2e6a0417ec..a2a4f5323c17 100644 --- a/sound/soc/s3c24xx/smdk2443_wm9710.c +++ b/sound/soc/s3c24xx/smdk2443_wm9710.c @@ -36,13 +36,13 @@ static struct snd_soc_dai_link smdk2443_dai[] = { static struct snd_soc_card smdk2443 = { .name = "SMDK2443", + .platform = &s3c24xx_soc_platform, .dai_link = smdk2443_dai, .num_links = ARRAY_SIZE(smdk2443_dai), }; static struct snd_soc_device smdk2443_snd_ac97_devdata = { .card = &smdk2443, - .platform = &s3c24xx_soc_platform, .codec_dev = &soc_codec_dev_ac97, }; diff --git a/sound/soc/sh/sh7760-ac97.c b/sound/soc/sh/sh7760-ac97.c index 8b44f9c8a9ff..ce7f95b59de3 100644 --- a/sound/soc/sh/sh7760-ac97.c +++ b/sound/soc/sh/sh7760-ac97.c @@ -40,13 +40,13 @@ static struct snd_soc_dai_link sh7760_ac97_dai = { static struct snd_soc_card sh7760_ac97_soc_machine = { .name = "SH7760 AC97", + .platform = &sh7760_soc_platform, .dai_link = &sh7760_ac97_dai, .num_links = 1, }; static struct snd_soc_device sh7760_ac97_snd_devdata = { .card = &sh7760_ac97_soc_machine, - .platform = &sh7760_soc_platform, .codec_dev = &soc_codec_dev_ac97, }; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index c4b22e6984e6..fe89260c9028 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -109,9 +109,10 @@ static int soc_pcm_open(struct snd_pcm_substream *substream) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_card *card = socdev->card; struct snd_pcm_runtime *runtime = substream->runtime; struct snd_soc_dai_link *machine = rtd->dai; - struct snd_soc_platform *platform = socdev->platform; + struct snd_soc_platform *platform = card->platform; struct snd_soc_dai *cpu_dai = machine->cpu_dai; struct snd_soc_dai *codec_dai = machine->codec_dai; int ret = 0; @@ -302,7 +303,7 @@ static int soc_codec_close(struct snd_pcm_substream *substream) struct snd_soc_device *socdev = rtd->socdev; struct snd_soc_card *card = socdev->card; struct snd_soc_dai_link *machine = rtd->dai; - struct snd_soc_platform *platform = socdev->platform; + struct snd_soc_platform *platform = card->platform; struct snd_soc_dai *cpu_dai = machine->cpu_dai; struct snd_soc_dai *codec_dai = machine->codec_dai; struct snd_soc_codec *codec = socdev->codec; @@ -370,7 +371,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream) struct snd_soc_device *socdev = rtd->socdev; struct snd_soc_card *card = socdev->card; struct snd_soc_dai_link *machine = rtd->dai; - struct snd_soc_platform *platform = socdev->platform; + struct snd_soc_platform *platform = card->platform; struct snd_soc_dai *cpu_dai = machine->cpu_dai; struct snd_soc_dai *codec_dai = machine->codec_dai; struct snd_soc_codec *codec = socdev->codec; @@ -464,7 +465,8 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; struct snd_soc_dai_link *machine = rtd->dai; - struct snd_soc_platform *platform = socdev->platform; + struct snd_soc_card *card = socdev->card; + struct snd_soc_platform *platform = card->platform; struct snd_soc_dai *cpu_dai = machine->cpu_dai; struct snd_soc_dai *codec_dai = machine->codec_dai; int ret = 0; @@ -534,7 +536,8 @@ static int soc_pcm_hw_free(struct snd_pcm_substream *substream) struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; struct snd_soc_dai_link *machine = rtd->dai; - struct snd_soc_platform *platform = socdev->platform; + struct snd_soc_card *card = socdev->card; + struct snd_soc_platform *platform = card->platform; struct snd_soc_dai *cpu_dai = machine->cpu_dai; struct snd_soc_dai *codec_dai = machine->codec_dai; struct snd_soc_codec *codec = socdev->codec; @@ -568,8 +571,9 @@ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd) { struct snd_soc_pcm_runtime *rtd = substream->private_data; struct snd_soc_device *socdev = rtd->socdev; + struct snd_soc_card *card= socdev->card; struct snd_soc_dai_link *machine = rtd->dai; - struct snd_soc_platform *platform = socdev->platform; + struct snd_soc_platform *platform = card->platform; struct snd_soc_dai *cpu_dai = machine->cpu_dai; struct snd_soc_dai *codec_dai = machine->codec_dai; int ret; @@ -610,7 +614,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) { struct snd_soc_device *socdev = platform_get_drvdata(pdev); struct snd_soc_card *card = socdev->card; - struct snd_soc_platform *platform = socdev->platform; + struct snd_soc_platform *platform = card->platform; struct snd_soc_codec_device *codec_dev = socdev->codec_dev; struct snd_soc_codec *codec = socdev->codec; int i; @@ -686,7 +690,7 @@ static void soc_resume_deferred(struct work_struct *work) struct snd_soc_card, deferred_resume_work); struct snd_soc_device *socdev = card->socdev; - struct snd_soc_platform *platform = socdev->platform; + struct snd_soc_platform *platform = card->platform; struct snd_soc_codec_device *codec_dev = socdev->codec_dev; struct snd_soc_codec *codec = socdev->codec; struct platform_device *pdev = to_platform_device(socdev->dev); @@ -770,7 +774,7 @@ static int soc_probe(struct platform_device *pdev) int ret = 0, i; struct snd_soc_device *socdev = platform_get_drvdata(pdev); struct snd_soc_card *card = socdev->card; - struct snd_soc_platform *platform = socdev->platform; + struct snd_soc_platform *platform = card->platform; struct snd_soc_codec_device *codec_dev = socdev->codec_dev; /* Bodge while we push things out of socdev */ @@ -835,7 +839,7 @@ static int soc_remove(struct platform_device *pdev) int i; struct snd_soc_device *socdev = platform_get_drvdata(pdev); struct snd_soc_card *card = socdev->card; - struct snd_soc_platform *platform = socdev->platform; + struct snd_soc_platform *platform = card->platform; struct snd_soc_codec_device *codec_dev = socdev->codec_dev; run_delayed_work(&card->delayed_work); @@ -875,6 +879,8 @@ static int soc_new_pcm(struct snd_soc_device *socdev, struct snd_soc_dai_link *dai_link, int num) { struct snd_soc_codec *codec = socdev->codec; + struct snd_soc_card *card = socdev->card; + struct snd_soc_platform *platform = card->platform; struct snd_soc_dai *codec_dai = dai_link->codec_dai; struct snd_soc_dai *cpu_dai = dai_link->cpu_dai; struct snd_soc_pcm_runtime *rtd; @@ -910,13 +916,13 @@ static int soc_new_pcm(struct snd_soc_device *socdev, dai_link->pcm = pcm; pcm->private_data = rtd; - soc_pcm_ops.mmap = socdev->platform->pcm_ops->mmap; - soc_pcm_ops.pointer = socdev->platform->pcm_ops->pointer; - soc_pcm_ops.ioctl = socdev->platform->pcm_ops->ioctl; - soc_pcm_ops.copy = socdev->platform->pcm_ops->copy; - soc_pcm_ops.silence = socdev->platform->pcm_ops->silence; - soc_pcm_ops.ack = socdev->platform->pcm_ops->ack; - soc_pcm_ops.page = socdev->platform->pcm_ops->page; + soc_pcm_ops.mmap = platform->pcm_ops->mmap; + soc_pcm_ops.pointer = platform->pcm_ops->pointer; + soc_pcm_ops.ioctl = platform->pcm_ops->ioctl; + soc_pcm_ops.copy = platform->pcm_ops->copy; + soc_pcm_ops.silence = platform->pcm_ops->silence; + soc_pcm_ops.ack = platform->pcm_ops->ack; + soc_pcm_ops.page = platform->pcm_ops->page; if (playback) snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK, &soc_pcm_ops); @@ -924,14 +930,14 @@ static int soc_new_pcm(struct snd_soc_device *socdev, if (capture) snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &soc_pcm_ops); - ret = socdev->platform->pcm_new(codec->card, codec_dai, pcm); + ret = platform->pcm_new(codec->card, codec_dai, pcm); if (ret < 0) { printk(KERN_ERR "asoc: platform pcm constructor failed\n"); kfree(rtd); return ret; } - pcm->private_free = socdev->platform->pcm_free; + pcm->private_free = platform->pcm_free; printk(KERN_INFO "asoc: %s <-> %s mapping ok\n", codec_dai->name, cpu_dai->name); return ret; -- cgit v1.2.3 From 8789a9e7df6bf9b93739c4c7d4e380725bc9e936 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Dec 2008 15:34:07 -0500 Subject: ring-buffer: read page interface Impact: new API to ring buffer This patch adds a new interface into the ring buffer that allows a page to be read from the ring buffer on a given CPU. For every page read, one must also be given to allow for a "swap" of the pages. rpage = ring_buffer_alloc_read_page(buffer); if (!rpage) goto err; ret = ring_buffer_read_page(buffer, &rpage, cpu, full); if (!ret) goto empty; process_page(rpage); ring_buffer_free_read_page(rpage); The caller of these functions must handle any waits that are needed to wait for new data. The ring_buffer_read_page will simply return 0 if there is no data, or if "full" is set and the writer is still on the current page. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ring_buffer.h | 5 ++ kernel/trace/ring_buffer.c | 166 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 171 insertions(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 3bb87a753fa3..1a350a847edd 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -124,6 +124,11 @@ void tracing_on(void); void tracing_off(void); void tracing_off_permanent(void); +void *ring_buffer_alloc_read_page(struct ring_buffer *buffer); +void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); +int ring_buffer_read_page(struct ring_buffer *buffer, + void **data_page, int cpu, int full); + enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, }; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 8619c5345889..50b74d3a5c32 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -687,6 +687,12 @@ static inline int rb_null_event(struct ring_buffer_event *event) return event->type == RINGBUF_TYPE_PADDING; } +static inline void * +__rb_data_page_index(struct buffer_data_page *page, unsigned index) +{ + return page->data + index; +} + static inline void *__rb_page_index(struct buffer_page *page, unsigned index) { return page->page->data + index; @@ -2232,6 +2238,166 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, return 0; } +static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer, + struct buffer_data_page *page) +{ + struct ring_buffer_event *event; + unsigned long head; + + __raw_spin_lock(&cpu_buffer->lock); + for (head = 0; head < local_read(&page->commit); + head += rb_event_length(event)) { + + event = __rb_data_page_index(page, head); + if (RB_WARN_ON(cpu_buffer, rb_null_event(event))) + return; + /* Only count data entries */ + if (event->type != RINGBUF_TYPE_DATA) + continue; + cpu_buffer->entries--; + } + __raw_spin_unlock(&cpu_buffer->lock); +} + +/** + * ring_buffer_alloc_read_page - allocate a page to read from buffer + * @buffer: the buffer to allocate for. + * + * This function is used in conjunction with ring_buffer_read_page. + * When reading a full page from the ring buffer, these functions + * can be used to speed up the process. The calling function should + * allocate a few pages first with this function. Then when it + * needs to get pages from the ring buffer, it passes the result + * of this function into ring_buffer_read_page, which will swap + * the page that was allocated, with the read page of the buffer. + * + * Returns: + * The page allocated, or NULL on error. + */ +void *ring_buffer_alloc_read_page(struct ring_buffer *buffer) +{ + unsigned long addr; + struct buffer_data_page *page; + + addr = __get_free_page(GFP_KERNEL); + if (!addr) + return NULL; + + page = (void *)addr; + + return page; +} + +/** + * ring_buffer_free_read_page - free an allocated read page + * @buffer: the buffer the page was allocate for + * @data: the page to free + * + * Free a page allocated from ring_buffer_alloc_read_page. + */ +void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data) +{ + free_page((unsigned long)data); +} + +/** + * ring_buffer_read_page - extract a page from the ring buffer + * @buffer: buffer to extract from + * @data_page: the page to use allocated from ring_buffer_alloc_read_page + * @cpu: the cpu of the buffer to extract + * @full: should the extraction only happen when the page is full. + * + * This function will pull out a page from the ring buffer and consume it. + * @data_page must be the address of the variable that was returned + * from ring_buffer_alloc_read_page. This is because the page might be used + * to swap with a page in the ring buffer. + * + * for example: + * rpage = ring_buffer_alloc_page(buffer); + * if (!rpage) + * return error; + * ret = ring_buffer_read_page(buffer, &rpage, cpu, 0); + * if (ret) + * process_page(rpage); + * + * When @full is set, the function will not return true unless + * the writer is off the reader page. + * + * Note: it is up to the calling functions to handle sleeps and wakeups. + * The ring buffer can be used anywhere in the kernel and can not + * blindly call wake_up. The layer that uses the ring buffer must be + * responsible for that. + * + * Returns: + * 1 if data has been transferred + * 0 if no data has been transferred. + */ +int ring_buffer_read_page(struct ring_buffer *buffer, + void **data_page, int cpu, int full) +{ + struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; + struct ring_buffer_event *event; + struct buffer_data_page *page; + unsigned long flags; + int ret = 0; + + if (!data_page) + return 0; + + page = *data_page; + if (!page) + return 0; + + spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + + /* + * rb_buffer_peek will get the next ring buffer if + * the current reader page is empty. + */ + event = rb_buffer_peek(buffer, cpu, NULL); + if (!event) + goto out; + + /* check for data */ + if (!local_read(&cpu_buffer->reader_page->page->commit)) + goto out; + /* + * If the writer is already off of the read page, then simply + * switch the read page with the given page. Otherwise + * we need to copy the data from the reader to the writer. + */ + if (cpu_buffer->reader_page == cpu_buffer->commit_page) { + unsigned int read = cpu_buffer->reader_page->read; + + if (full) + goto out; + /* The writer is still on the reader page, we must copy */ + page = cpu_buffer->reader_page->page; + memcpy(page->data, + cpu_buffer->reader_page->page->data + read, + local_read(&page->commit) - read); + + /* consume what was read */ + cpu_buffer->reader_page += read; + + } else { + /* swap the pages */ + rb_init_page(page); + page = cpu_buffer->reader_page->page; + cpu_buffer->reader_page->page = *data_page; + cpu_buffer->reader_page->read = 0; + *data_page = page; + } + ret = 1; + + /* update the entry counter */ + rb_remove_entries(cpu_buffer, page); + out: + spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + + return ret; +} + static ssize_t rb_simple_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) -- cgit v1.2.3 From 14a866c567e040ccf6240d68b083dd1dbbde63e6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Dec 2008 23:50:02 -0500 Subject: ftrace: add ftrace_graph_stop() Impact: new ftrace_graph_stop function While developing more features of function graph, I hit a bug that caused the WARN_ON to trigger in the prepare_ftrace_return function. Well, it was hard for me to find out that was happening because the bug would not print, it would just cause a hard lockup or reboot. The reason is that it is not safe to call printk from this function. Looking further, I also found that it calls unregister_ftrace_graph, which grabs a mutex and calls kstop machine. This would definitely lock the box up if it were to trigger. This patch adds a fast and safe ftrace_graph_stop() which will stop the function tracer. Then it is safe to call the WARN ON. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 10 ++++++---- include/linux/ftrace.h | 2 ++ kernel/trace/ftrace.c | 5 +++++ 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 1a5b8f8cb3cc..adba8e9a427c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -484,14 +484,16 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) : "memory" ); - if (WARN_ON(faulted)) { - unregister_ftrace_graph(); + if (unlikely(faulted)) { + ftrace_graph_stop(); + WARN_ON(1); return; } - if (WARN_ON(!__kernel_text_address(old))) { - unregister_ftrace_graph(); + if (unlikely(!__kernel_text_address(old))) { + ftrace_graph_stop(); *parent = old; + WARN_ON(1); return; } diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index afba918c623c..58ca1c3a3f4d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -376,6 +376,8 @@ typedef void (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ extern int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc); +extern void ftrace_graph_stop(void); + /* The current handlers in use */ extern trace_func_graph_ret_t ftrace_graph_return; extern trace_func_graph_ent_t ftrace_graph_entry; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2e78628443e8..a44af05ae2d0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1769,5 +1769,10 @@ void ftrace_graph_exit_task(struct task_struct *t) kfree(ret_stack); } + +void ftrace_graph_stop(void) +{ + ftrace_stop(); +} #endif -- cgit v1.2.3 From e49dc19c6a19ea112fcb94b7c62ec62cdd5c08aa Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 2 Dec 2008 23:50:05 -0500 Subject: ftrace: function graph return for function entry Impact: feature, let entry function decide to trace or not This patch lets the graph tracer entry function decide if the tracing should be done at the end as well. This requires all function graph entry functions return 1 if it should trace, or 0 if the return should not be traced. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 3 +++ arch/x86/kernel/entry_64.S | 3 +++ arch/x86/kernel/ftrace.c | 7 ++++++- include/linux/ftrace.h | 2 +- kernel/trace/ftrace.c | 10 +++++++--- kernel/trace/trace.c | 4 +++- kernel/trace/trace.h | 2 +- 7 files changed, 24 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 826682abed1d..43ceb3f454bf 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1196,6 +1196,9 @@ ENTRY(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER cmpl $ftrace_stub, ftrace_graph_return jnz ftrace_graph_caller + + cmpl $ftrace_graph_entry_stub, ftrace_graph_entry + jnz ftrace_graph_caller #endif .globl ftrace_stub ftrace_stub: diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 9060ba6497e2..54e0bbdccb99 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -120,6 +120,9 @@ ENTRY(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER cmpq $ftrace_stub, ftrace_graph_return jnz ftrace_graph_caller + + cmpq $ftrace_graph_entry_stub, ftrace_graph_entry + jnz ftrace_graph_caller #endif .globl ftrace_stub diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index adba8e9a427c..d278ad2ebda2 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -425,6 +425,7 @@ static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) trace->calltime = current->ret_stack[index].calltime; trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = index; + barrier(); current->curr_ret_stack--; } @@ -506,7 +507,11 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) } trace.func = self_addr; - ftrace_graph_entry(&trace); + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + *parent = old; + } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 58ca1c3a3f4d..469ceb3e85ba 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -371,7 +371,7 @@ struct ftrace_graph_ret { #define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of the callback handlers for tracing function graph*/ typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ -typedef void (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ +typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ extern int register_ftrace_graph(trace_func_graph_ret_t retfunc, trace_func_graph_ent_t entryfunc); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index a44af05ae2d0..65b9e863056b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1636,11 +1636,15 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, static atomic_t ftrace_graph_active; +int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) +{ + return 0; +} + /* The callbacks that hook a function */ trace_func_graph_ret_t ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; -trace_func_graph_ent_t ftrace_graph_entry = - (trace_func_graph_ent_t)ftrace_stub; +trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) @@ -1738,7 +1742,7 @@ void unregister_ftrace_graph(void) atomic_dec(&ftrace_graph_active); ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; - ftrace_graph_entry = (trace_func_graph_ent_t)ftrace_stub; + ftrace_graph_entry = ftrace_graph_entry_stub; ftrace_shutdown(FTRACE_STOP_FUNC_RET); mutex_unlock(&ftrace_sysctl_lock); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 380de630ebce..8b6409a62b54 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1200,7 +1200,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER -void trace_graph_entry(struct ftrace_graph_ent *trace) +int trace_graph_entry(struct ftrace_graph_ent *trace) { struct trace_array *tr = &global_trace; struct trace_array_cpu *data; @@ -1219,6 +1219,8 @@ void trace_graph_entry(struct ftrace_graph_ent *trace) } atomic_dec(&data->disabled); local_irq_restore(flags); + + return 1; } void trace_graph_return(struct ftrace_graph_ret *trace) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f96f4e787ff3..0565ae9a2210 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -412,7 +412,7 @@ void trace_function(struct trace_array *tr, unsigned long flags, int pc); void trace_graph_return(struct ftrace_graph_ret *trace); -void trace_graph_entry(struct ftrace_graph_ent *trace); +int trace_graph_entry(struct ftrace_graph_ent *trace); void trace_bts(struct trace_array *tr, unsigned long from, unsigned long to); -- cgit v1.2.3 From b908b53d580c3e9aba81ebe3339c5b7b4fa8031d Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 1 Dec 2008 06:30:04 +0000 Subject: of/gpio: Implement of_get_gpio_flags() This adds a new function, of_get_gpio_flags, which is like of_get_gpio(), but accepts a new "flags" argument. This new function will be used by the drivers that need to retrieve additional GPIO information, such as active-low flag. Also, this changes the default ("simple") .xlate routine to warn about bogus (< 2) #gpio-cells usage: the second cell should always be present for GPIO flags. Signed-off-by: Anton Vorontsov Signed-off-by: Paul Mackerras --- drivers/of/gpio.c | 36 +++++++++++++++++++++++++++++------- include/linux/of_gpio.h | 38 ++++++++++++++++++++++++++++++++++---- 2 files changed, 63 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/of/gpio.c b/drivers/of/gpio.c index 7cd7301b5839..a4ba217116eb 100644 --- a/drivers/of/gpio.c +++ b/drivers/of/gpio.c @@ -19,14 +19,17 @@ #include /** - * of_get_gpio - Get a GPIO number from the device tree to use with GPIO API + * of_get_gpio_flags - Get a GPIO number and flags to use with GPIO API * @np: device node to get GPIO from * @index: index of the GPIO + * @flags: a flags pointer to fill in * * Returns GPIO number to use with Linux generic GPIO API, or one of the errno - * value on the error condition. + * value on the error condition. If @flags is not NULL the function also fills + * in flags for the GPIO. */ -int of_get_gpio(struct device_node *np, int index) +int of_get_gpio_flags(struct device_node *np, int index, + enum of_gpio_flags *flags) { int ret; struct device_node *gc; @@ -59,7 +62,11 @@ int of_get_gpio(struct device_node *np, int index) goto err1; } - ret = of_gc->xlate(of_gc, np, gpio_spec); + /* .xlate might decide to not fill in the flags, so clear it. */ + if (flags) + *flags = 0; + + ret = of_gc->xlate(of_gc, np, gpio_spec, flags); if (ret < 0) goto err1; @@ -70,26 +77,41 @@ err0: pr_debug("%s exited with status %d\n", __func__, ret); return ret; } -EXPORT_SYMBOL(of_get_gpio); +EXPORT_SYMBOL(of_get_gpio_flags); /** - * of_gpio_simple_xlate - translate gpio_spec to the GPIO number + * of_gpio_simple_xlate - translate gpio_spec to the GPIO number and flags * @of_gc: pointer to the of_gpio_chip structure * @np: device node of the GPIO chip * @gpio_spec: gpio specifier as found in the device tree + * @flags: a flags pointer to fill in * * This is simple translation function, suitable for the most 1:1 mapped * gpio chips. This function performs only one sanity check: whether gpio * is less than ngpios (that is specified in the gpio_chip). */ int of_gpio_simple_xlate(struct of_gpio_chip *of_gc, struct device_node *np, - const void *gpio_spec) + const void *gpio_spec, enum of_gpio_flags *flags) { const u32 *gpio = gpio_spec; + /* + * We're discouraging gpio_cells < 2, since that way you'll have to + * write your own xlate function (that will have to retrive the GPIO + * number and the flags from a single gpio cell -- this is possible, + * but not recommended). + */ + if (of_gc->gpio_cells < 2) { + WARN_ON(1); + return -EINVAL; + } + if (*gpio > of_gc->gc.ngpio) return -EINVAL; + if (flags) + *flags = gpio[1]; + return *gpio; } EXPORT_SYMBOL(of_gpio_simple_xlate); diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 67db101d0eb8..e25abf610cb6 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -14,9 +14,22 @@ #ifndef __LINUX_OF_GPIO_H #define __LINUX_OF_GPIO_H +#include +#include #include #include +struct device_node; + +/* + * This is Linux-specific flags. By default controllers' and Linux' mapping + * match, but GPIO controllers are free to translate their own flags to + * Linux-specific in their .xlate callback. Though, 1:1 mapping is recommended. + */ +enum of_gpio_flags { + OF_GPIO_ACTIVE_LOW = 0x1, +}; + #ifdef CONFIG_OF_GPIO /* @@ -26,7 +39,7 @@ struct of_gpio_chip { struct gpio_chip gc; int gpio_cells; int (*xlate)(struct of_gpio_chip *of_gc, struct device_node *np, - const void *gpio_spec); + const void *gpio_spec, enum of_gpio_flags *flags); }; static inline struct of_gpio_chip *to_of_gpio_chip(struct gpio_chip *gc) @@ -50,20 +63,37 @@ static inline struct of_mm_gpio_chip *to_of_mm_gpio_chip(struct gpio_chip *gc) return container_of(of_gc, struct of_mm_gpio_chip, of_gc); } -extern int of_get_gpio(struct device_node *np, int index); +extern int of_get_gpio_flags(struct device_node *np, int index, + enum of_gpio_flags *flags); + extern int of_mm_gpiochip_add(struct device_node *np, struct of_mm_gpio_chip *mm_gc); extern int of_gpio_simple_xlate(struct of_gpio_chip *of_gc, struct device_node *np, - const void *gpio_spec); + const void *gpio_spec, + enum of_gpio_flags *flags); #else /* Drivers may not strictly depend on the GPIO support, so let them link. */ -static inline int of_get_gpio(struct device_node *np, int index) +static inline int of_get_gpio_flags(struct device_node *np, int index, + enum of_gpio_flags *flags) { return -ENOSYS; } #endif /* CONFIG_OF_GPIO */ +/** + * of_get_gpio - Get a GPIO number to use with GPIO API + * @np: device node to get GPIO from + * @index: index of the GPIO + * + * Returns GPIO number to use with Linux generic GPIO API, or one of the errno + * value on the error condition. + */ +static inline int of_get_gpio(struct device_node *np, int index) +{ + return of_get_gpio_flags(np, index, NULL); +} + #endif /* __LINUX_OF_GPIO_H */ -- cgit v1.2.3 From 384c89e2e4cb5879b86a38414d1b3bb2b23ec8ee Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 3 Dec 2008 17:34:03 +0000 Subject: ASoC: Push debugfs files out of the snd_soc_device structure This is in preparation for the removal of struct snd_soc_device. The pop time configuration should really be a property of the card not the codec but since DAPM currently uses the codec rather than the card using the codec is fine for now. Signed-off-by: Mark Brown --- include/sound/soc.h | 8 +++--- sound/soc/soc-core.c | 73 +++++++++++++++++++++++++++++----------------------- 2 files changed, 46 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index ad8141acd6b0..3ee608dce2f8 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -279,6 +279,11 @@ struct snd_soc_codec { /* codec DAI's */ struct snd_soc_dai *dai; unsigned int num_dai; + +#ifdef CONFIG_DEBUG_FS + struct dentry *debugfs_reg; + struct dentry *debugfs_pop_time; +#endif }; /* codec device */ @@ -364,9 +369,6 @@ struct snd_soc_device { struct snd_soc_codec *codec; struct snd_soc_codec_device *codec_dev; void *codec_data; -#ifdef CONFIG_DEBUG_FS - struct dentry *debugfs_root; -#endif }; /* runtime channel data */ diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index fe89260c9028..34114398b914 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -39,6 +39,10 @@ static DEFINE_MUTEX(pcm_mutex); static DEFINE_MUTEX(io_mutex); static DECLARE_WAIT_QUEUE_HEAD(soc_pm_waitq); +#ifdef CONFIG_DEBUG_FS +static struct dentry *debugfs_root; +#endif + /* * This is a timeout to do a DAPM powerdown after a stream is closed(). * It can be used to eliminate pops between different playback streams, e.g. @@ -1002,7 +1006,9 @@ static ssize_t codec_reg_read_file(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { ssize_t ret; - struct snd_soc_device *devdata = file->private_data; + struct snd_soc_codec *codec = file->private_data; + struct device *card_dev = codec->card->dev; + struct snd_soc_device *devdata = card_dev->driver_data; char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -1021,8 +1027,7 @@ static ssize_t codec_reg_write_file(struct file *file, char *start = buf; unsigned long reg, value; int step = 1; - struct snd_soc_device *devdata = file->private_data; - struct snd_soc_codec *codec = devdata->codec; + struct snd_soc_codec *codec = file->private_data; buf_size = min(count, (sizeof(buf)-1)); if (copy_from_user(buf, user_buf, buf_size)) @@ -1051,44 +1056,36 @@ static const struct file_operations codec_reg_fops = { .write = codec_reg_write_file, }; -static void soc_init_debugfs(struct snd_soc_device *socdev) +static void soc_init_codec_debugfs(struct snd_soc_codec *codec) { - struct dentry *root, *file; - struct snd_soc_codec *codec = socdev->codec; - root = debugfs_create_dir(dev_name(socdev->dev), NULL); - if (IS_ERR(root) || !root) - goto exit1; - - file = debugfs_create_file("codec_reg", 0644, - root, socdev, &codec_reg_fops); - if (!file) - goto exit2; - - file = debugfs_create_u32("dapm_pop_time", 0744, - root, &codec->pop_time); - if (!file) - goto exit2; - socdev->debugfs_root = root; - return; -exit2: - debugfs_remove_recursive(root); -exit1: - dev_err(socdev->dev, "debugfs is not available\n"); + codec->debugfs_reg = debugfs_create_file("codec_reg", 0644, + debugfs_root, codec, + &codec_reg_fops); + if (!codec->debugfs_reg) + printk(KERN_WARNING + "ASoC: Failed to create codec register debugfs file\n"); + + codec->debugfs_pop_time = debugfs_create_u32("dapm_pop_time", 0744, + debugfs_root, + &codec->pop_time); + if (!codec->debugfs_pop_time) + printk(KERN_WARNING + "Failed to create pop time debugfs file\n"); } -static void soc_cleanup_debugfs(struct snd_soc_device *socdev) +static void soc_cleanup_codec_debugfs(struct snd_soc_codec *codec) { - debugfs_remove_recursive(socdev->debugfs_root); - socdev->debugfs_root = NULL; + debugfs_remove(codec->debugfs_pop_time); + debugfs_remove(codec->debugfs_reg); } #else -static inline void soc_init_debugfs(struct snd_soc_device *socdev) +static inline void soc_init_codec_debugfs(struct snd_soc_codec *codec) { } -static inline void soc_cleanup_debugfs(struct snd_soc_device *socdev) +static inline void soc_cleanup_codec_debugfs(struct snd_soc_codec *codec) { } #endif @@ -1305,7 +1302,7 @@ int snd_soc_init_card(struct snd_soc_device *socdev) if (err < 0) printk(KERN_WARNING "asoc: failed to add codec sysfs files\n"); - soc_init_debugfs(socdev); + soc_init_codec_debugfs(socdev->codec); mutex_unlock(&codec->mutex); out: @@ -1329,7 +1326,7 @@ void snd_soc_free_pcms(struct snd_soc_device *socdev) #endif mutex_lock(&codec->mutex); - soc_cleanup_debugfs(socdev); + soc_cleanup_codec_debugfs(socdev->codec); #ifdef CONFIG_SND_SOC_AC97_BUS for (i = 0; i < codec->num_dai; i++) { codec_dai = &codec->dai[i]; @@ -1962,11 +1959,23 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_digital_mute); static int __devinit snd_soc_init(void) { +#ifdef CONFIG_DEBUG_FS + debugfs_root = debugfs_create_dir("asoc", NULL); + if (IS_ERR(debugfs_root) || !debugfs_root) { + printk(KERN_WARNING + "ASoC: Failed to create debugfs directory\n"); + debugfs_root = NULL; + } +#endif + return platform_driver_register(&soc_driver); } static void __exit snd_soc_exit(void) { +#ifdef CONFIG_DEBUG_FS + debugfs_remove_recursive(debugfs_root); +#endif platform_driver_unregister(&soc_driver); } -- cgit v1.2.3 From 07c84d0409f3551b79d676630d8ee76bb551598d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 3 Dec 2008 18:17:28 +0000 Subject: ASoC: Remove device from platform suspend and resume operations None of the platforms are actually using the SoC device so remove it (only atmel actually has a suspend method). Signed-off-by: Mark Brown --- include/sound/soc.h | 6 ++---- sound/soc/atmel/atmel-pcm.c | 6 ++---- sound/soc/soc-core.c | 4 ++-- 3 files changed, 6 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 3ee608dce2f8..8ec63c02dc10 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -300,10 +300,8 @@ struct snd_soc_platform { int (*probe)(struct platform_device *pdev); int (*remove)(struct platform_device *pdev); - int (*suspend)(struct platform_device *pdev, - struct snd_soc_dai *dai); - int (*resume)(struct platform_device *pdev, - struct snd_soc_dai *dai); + int (*suspend)(struct snd_soc_dai *dai); + int (*resume)(struct snd_soc_dai *dai); /* pcm creation and destruction */ int (*pcm_new)(struct snd_card *, struct snd_soc_dai *, diff --git a/sound/soc/atmel/atmel-pcm.c b/sound/soc/atmel/atmel-pcm.c index 394412fb396f..8507aa1cd811 100644 --- a/sound/soc/atmel/atmel-pcm.c +++ b/sound/soc/atmel/atmel-pcm.c @@ -417,8 +417,7 @@ static void atmel_pcm_free_dma_buffers(struct snd_pcm *pcm) } #ifdef CONFIG_PM -static int atmel_pcm_suspend(struct platform_device *pdev, - struct snd_soc_dai *dai) +static int atmel_pcm_suspend(struct snd_soc_dai *dai) { struct snd_pcm_runtime *runtime = dai->runtime; struct atmel_runtime_data *prtd; @@ -442,8 +441,7 @@ static int atmel_pcm_suspend(struct platform_device *pdev, return 0; } -static int atmel_pcm_resume(struct platform_device *pdev, - struct snd_soc_dai *dai) +static int atmel_pcm_resume(struct snd_soc_dai *dai) { struct snd_pcm_runtime *runtime = dai->runtime; struct atmel_runtime_data *prtd; diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 34114398b914..f83852f11463 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -652,7 +652,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) if (cpu_dai->suspend && !cpu_dai->ac97_control) cpu_dai->suspend(pdev, cpu_dai); if (platform->suspend) - platform->suspend(pdev, cpu_dai); + platform->suspend(cpu_dai); } /* close any waiting streams and save state */ @@ -741,7 +741,7 @@ static void soc_resume_deferred(struct work_struct *work) if (cpu_dai->resume && !cpu_dai->ac97_control) cpu_dai->resume(pdev, cpu_dai); if (platform->resume) - platform->resume(pdev, cpu_dai); + platform->resume(cpu_dai); } if (card->resume_post) -- cgit v1.2.3 From dc7d7b830ee1f4111696e73d1c25da683b461548 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 3 Dec 2008 18:21:52 +0000 Subject: ASoC: Remove platform device from DAI suspend and resume operations None of the DAIs use it except s3c2412-i2s which only uses it for dev_() printouts. Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 6 ++---- sound/soc/atmel/atmel_ssc_dai.c | 6 ++---- sound/soc/au1x/psc-ac97.c | 6 ++---- sound/soc/au1x/psc-i2s.c | 6 ++---- sound/soc/blackfin/bf5xx-ac97.c | 6 ++---- sound/soc/blackfin/bf5xx-i2s.c | 6 ++---- sound/soc/pxa/pxa-ssp.c | 6 ++---- sound/soc/pxa/pxa2xx-ac97.c | 6 ++---- sound/soc/pxa/pxa2xx-i2s.c | 6 ++---- sound/soc/s3c24xx/s3c2412-i2s.c | 16 +++++++--------- sound/soc/s3c24xx/s3c24xx-i2s.c | 6 ++---- sound/soc/soc-core.c | 8 ++++---- 12 files changed, 31 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index a01a24b10196..e2d5f76838c6 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -191,10 +191,8 @@ struct snd_soc_dai { struct snd_soc_dai *dai); void (*remove)(struct platform_device *pdev, struct snd_soc_dai *dai); - int (*suspend)(struct platform_device *pdev, - struct snd_soc_dai *dai); - int (*resume)(struct platform_device *pdev, - struct snd_soc_dai *dai); + int (*suspend)(struct snd_soc_dai *dai); + int (*resume)(struct snd_soc_dai *dai); /* ops */ struct snd_soc_dai_ops ops; diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index 0bb18dfa9495..d9b874c5bf37 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -628,8 +628,7 @@ static int atmel_ssc_prepare(struct snd_pcm_substream *substream, #ifdef CONFIG_PM -static int atmel_ssc_suspend(struct platform_device *pdev, - struct snd_soc_dai *cpu_dai) +static int atmel_ssc_suspend(struct snd_soc_dai *cpu_dai) { struct atmel_ssc_info *ssc_p; @@ -657,8 +656,7 @@ static int atmel_ssc_suspend(struct platform_device *pdev, -static int atmel_ssc_resume(struct platform_device *pdev, - struct snd_soc_dai *cpu_dai) +static int atmel_ssc_resume(struct snd_soc_dai *cpu_dai) { struct atmel_ssc_info *ssc_p; u32 cr; diff --git a/sound/soc/au1x/psc-ac97.c b/sound/soc/au1x/psc-ac97.c index a0bcfeaf5f86..a1e824d29cf9 100644 --- a/sound/soc/au1x/psc-ac97.c +++ b/sound/soc/au1x/psc-ac97.c @@ -314,8 +314,7 @@ static void au1xpsc_ac97_remove(struct platform_device *pdev, au1xpsc_ac97_workdata = NULL; } -static int au1xpsc_ac97_suspend(struct platform_device *pdev, - struct snd_soc_dai *dai) +static int au1xpsc_ac97_suspend(struct snd_soc_dai *dai) { /* save interesting registers and disable PSC */ au1xpsc_ac97_workdata->pm[0] = @@ -329,8 +328,7 @@ static int au1xpsc_ac97_suspend(struct platform_device *pdev, return 0; } -static int au1xpsc_ac97_resume(struct platform_device *pdev, - struct snd_soc_dai *dai) +static int au1xpsc_ac97_resume(struct snd_soc_dai *dai) { /* restore PSC clock config */ au_writel(au1xpsc_ac97_workdata->pm[0] | PSC_SEL_PS_AC97MODE, diff --git a/sound/soc/au1x/psc-i2s.c b/sound/soc/au1x/psc-i2s.c index f4217e70a787..16f97462ea15 100644 --- a/sound/soc/au1x/psc-i2s.c +++ b/sound/soc/au1x/psc-i2s.c @@ -339,8 +339,7 @@ static void au1xpsc_i2s_remove(struct platform_device *pdev, au1xpsc_i2s_workdata = NULL; } -static int au1xpsc_i2s_suspend(struct platform_device *pdev, - struct snd_soc_dai *cpu_dai) +static int au1xpsc_i2s_suspend(struct snd_soc_dai *cpu_dai) { /* save interesting register and disable PSC */ au1xpsc_i2s_workdata->pm[0] = @@ -354,8 +353,7 @@ static int au1xpsc_i2s_suspend(struct platform_device *pdev, return 0; } -static int au1xpsc_i2s_resume(struct platform_device *pdev, - struct snd_soc_dai *cpu_dai) +static int au1xpsc_i2s_resume(struct snd_soc_dai *cpu_dai) { /* select I2S mode and PSC clock */ au_writel(PSC_CTRL_DISABLE, PSC_CTRL(au1xpsc_i2s_workdata)); diff --git a/sound/soc/blackfin/bf5xx-ac97.c b/sound/soc/blackfin/bf5xx-ac97.c index 709bdf08e398..c602ce109d52 100644 --- a/sound/soc/blackfin/bf5xx-ac97.c +++ b/sound/soc/blackfin/bf5xx-ac97.c @@ -269,8 +269,7 @@ struct snd_ac97_bus_ops soc_ac97_ops = { EXPORT_SYMBOL_GPL(soc_ac97_ops); #ifdef CONFIG_PM -static int bf5xx_ac97_suspend(struct platform_device *pdev, - struct snd_soc_dai *dai) +static int bf5xx_ac97_suspend(struct snd_soc_dai *dai) { struct sport_device *sport = (struct sport_device *)dai->private_data; @@ -285,8 +284,7 @@ static int bf5xx_ac97_suspend(struct platform_device *pdev, return 0; } -static int bf5xx_ac97_resume(struct platform_device *pdev, - struct snd_soc_dai *dai) +static int bf5xx_ac97_resume(struct snd_soc_dai *dai) { int ret; struct sport_device *sport = diff --git a/sound/soc/blackfin/bf5xx-i2s.c b/sound/soc/blackfin/bf5xx-i2s.c index 6e5036bf9245..9f8ce87cc6c6 100644 --- a/sound/soc/blackfin/bf5xx-i2s.c +++ b/sound/soc/blackfin/bf5xx-i2s.c @@ -222,16 +222,14 @@ static int bf5xx_i2s_probe(struct platform_device *pdev, return 0; } -static void bf5xx_i2s_remove(struct platform_device *pdev, - struct snd_soc_dai *dai) +static void bf5xx_i2s_remove(struct snd_soc_dai *dai) { pr_debug("%s enter\n", __func__); peripheral_free_list(&sport_req[sport_num][0]); } #ifdef CONFIG_PM -static int bf5xx_i2s_suspend(struct platform_device *dev, - struct snd_soc_dai *dai) +static int bf5xx_i2s_suspend(struct snd_soc_dai *dai) { struct sport_device *sport = (struct sport_device *)dai->private_data; diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index 402fc5ba65e7..73fa10defcca 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -244,8 +244,7 @@ static void pxa_ssp_shutdown(struct snd_pcm_substream *substream, #ifdef CONFIG_PM -static int pxa_ssp_suspend(struct platform_device *pdev, - struct snd_soc_dai *cpu_dai) +static int pxa_ssp_suspend(struct snd_soc_dai *cpu_dai) { struct ssp_priv *priv = cpu_dai->private_data; @@ -257,8 +256,7 @@ static int pxa_ssp_suspend(struct platform_device *pdev, return 0; } -static int pxa_ssp_resume(struct platform_device *pdev, - struct snd_soc_dai *cpu_dai) +static int pxa_ssp_resume(struct snd_soc_dai *cpu_dai) { struct ssp_priv *priv = cpu_dai->private_data; diff --git a/sound/soc/pxa/pxa2xx-ac97.c b/sound/soc/pxa/pxa2xx-ac97.c index bffbe288634c..8eed80d5675d 100644 --- a/sound/soc/pxa/pxa2xx-ac97.c +++ b/sound/soc/pxa/pxa2xx-ac97.c @@ -87,14 +87,12 @@ static struct pxa2xx_pcm_dma_params pxa2xx_ac97_pcm_mic_mono_in = { }; #ifdef CONFIG_PM -static int pxa2xx_ac97_suspend(struct platform_device *pdev, - struct snd_soc_dai *dai) +static int pxa2xx_ac97_suspend(struct snd_soc_dai *dai) { return pxa2xx_ac97_hw_suspend(); } -static int pxa2xx_ac97_resume(struct platform_device *pdev, - struct snd_soc_dai *dai) +static int pxa2xx_ac97_resume(struct snd_soc_dai *dai) { return pxa2xx_ac97_hw_resume(); } diff --git a/sound/soc/pxa/pxa2xx-i2s.c b/sound/soc/pxa/pxa2xx-i2s.c index f9a9e2ebafa1..314973ace6dc 100644 --- a/sound/soc/pxa/pxa2xx-i2s.c +++ b/sound/soc/pxa/pxa2xx-i2s.c @@ -293,8 +293,7 @@ static void pxa2xx_i2s_shutdown(struct snd_pcm_substream *substream, } #ifdef CONFIG_PM -static int pxa2xx_i2s_suspend(struct platform_device *dev, - struct snd_soc_dai *dai) +static int pxa2xx_i2s_suspend(struct snd_soc_dai *dai) { if (!dai->active) return 0; @@ -311,8 +310,7 @@ static int pxa2xx_i2s_suspend(struct platform_device *dev, return 0; } -static int pxa2xx_i2s_resume(struct platform_device *pdev, - struct snd_soc_dai *dai) +static int pxa2xx_i2s_resume(struct snd_soc_dai *dai) { if (!dai->active) return 0; diff --git a/sound/soc/s3c24xx/s3c2412-i2s.c b/sound/soc/s3c24xx/s3c2412-i2s.c index 1c741047ae35..75f87c3c74d0 100644 --- a/sound/soc/s3c24xx/s3c2412-i2s.c +++ b/sound/soc/s3c24xx/s3c2412-i2s.c @@ -649,8 +649,7 @@ static int s3c2412_i2s_probe(struct platform_device *pdev, } #ifdef CONFIG_PM -static int s3c2412_i2s_suspend(struct platform_device *dev, - struct snd_soc_dai *dai) +static int s3c2412_i2s_suspend(struct snd_soc_dai *dai) { struct s3c2412_i2s_info *i2s = &s3c2412_i2s; u32 iismod; @@ -665,25 +664,24 @@ static int s3c2412_i2s_suspend(struct platform_device *dev, iismod = readl(i2s->regs + S3C2412_IISMOD); if (iismod & S3C2412_IISCON_RXDMA_ACTIVE) - dev_warn(&dev->dev, "%s: RXDMA active?\n", __func__); + pr_warning("%s: RXDMA active?\n", __func__); if (iismod & S3C2412_IISCON_TXDMA_ACTIVE) - dev_warn(&dev->dev, "%s: TXDMA active?\n", __func__); + pr_warning("%s: TXDMA active?\n", __func__); if (iismod & S3C2412_IISCON_IIS_ACTIVE) - dev_warn(&dev->dev, "%s: IIS active\n", __func__); + pr_warning("%s: IIS active\n", __func__); } return 0; } -static int s3c2412_i2s_resume(struct platform_device *pdev, - struct snd_soc_dai *dai) +static int s3c2412_i2s_resume(struct snd_soc_dai *dai) { struct s3c2412_i2s_info *i2s = &s3c2412_i2s; - dev_info(&pdev->dev, "dai_active %d, IISMOD %08x, IISCON %08x\n", - dai->active, i2s->suspend_iismod, i2s->suspend_iiscon); + pr_info("dai_active %d, IISMOD %08x, IISCON %08x\n", + dai->active, i2s->suspend_iismod, i2s->suspend_iiscon); if (dai->active) { writel(i2s->suspend_iiscon, i2s->regs + S3C2412_IISCON); diff --git a/sound/soc/s3c24xx/s3c24xx-i2s.c b/sound/soc/s3c24xx/s3c24xx-i2s.c index 8d9135f41bc9..45fe8f7c88ab 100644 --- a/sound/soc/s3c24xx/s3c24xx-i2s.c +++ b/sound/soc/s3c24xx/s3c24xx-i2s.c @@ -419,8 +419,7 @@ static int s3c24xx_i2s_probe(struct platform_device *pdev, } #ifdef CONFIG_PM -static int s3c24xx_i2s_suspend(struct platform_device *pdev, - struct snd_soc_dai *cpu_dai) +static int s3c24xx_i2s_suspend(struct snd_soc_dai *cpu_dai) { DBG("Entered %s\n", __func__); @@ -434,8 +433,7 @@ static int s3c24xx_i2s_suspend(struct platform_device *pdev, return 0; } -static int s3c24xx_i2s_resume(struct platform_device *pdev, - struct snd_soc_dai *cpu_dai) +static int s3c24xx_i2s_resume(struct snd_soc_dai *cpu_dai) { DBG("Entered %s\n", __func__); clk_enable(s3c24xx_i2s.iis_clk); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index f83852f11463..2f2a8d93bbf0 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -650,7 +650,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) for (i = 0; i < card->num_links; i++) { struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; if (cpu_dai->suspend && !cpu_dai->ac97_control) - cpu_dai->suspend(pdev, cpu_dai); + cpu_dai->suspend(cpu_dai); if (platform->suspend) platform->suspend(cpu_dai); } @@ -676,7 +676,7 @@ static int soc_suspend(struct platform_device *pdev, pm_message_t state) for (i = 0; i < card->num_links; i++) { struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; if (cpu_dai->suspend && cpu_dai->ac97_control) - cpu_dai->suspend(pdev, cpu_dai); + cpu_dai->suspend(cpu_dai); } if (card->suspend_post) @@ -712,7 +712,7 @@ static void soc_resume_deferred(struct work_struct *work) for (i = 0; i < card->num_links; i++) { struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; if (cpu_dai->resume && cpu_dai->ac97_control) - cpu_dai->resume(pdev, cpu_dai); + cpu_dai->resume(cpu_dai); } if (codec_dev->resume) @@ -739,7 +739,7 @@ static void soc_resume_deferred(struct work_struct *work) for (i = 0; i < card->num_links; i++) { struct snd_soc_dai *cpu_dai = card->dai_link[i].cpu_dai; if (cpu_dai->resume && !cpu_dai->ac97_control) - cpu_dai->resume(pdev, cpu_dai); + cpu_dai->resume(cpu_dai); if (platform->resume) platform->resume(cpu_dai); } -- cgit v1.2.3 From 52404881984e2d447f920a23e3bb63262dfc77f3 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 3 Dec 2008 15:42:56 -0800 Subject: Phonet: basic net namespace support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 2 +- include/net/phonet/pn_dev.h | 2 +- net/phonet/af_phonet.c | 8 +------- net/phonet/pn_dev.c | 6 ++++-- net/phonet/socket.c | 11 +++++++---- 5 files changed, 14 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index c6a245184460..057b0a8a2885 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -46,7 +46,7 @@ static inline struct pn_sock *pn_sk(struct sock *sk) extern const struct proto_ops phonet_dgram_ops; -struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *sa); +struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *sa); void phonet_get_local_port_range(int *min, int *max); void pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index bbd2a836e04c..aa1c59a1d33f 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -43,7 +43,7 @@ struct net_device *phonet_device_get(struct net *net); int phonet_address_add(struct net_device *dev, u8 addr); int phonet_address_del(struct net_device *dev, u8 addr); u8 phonet_address_get(struct net_device *dev, u8 addr); -int phonet_address_lookup(u8 addr); +int phonet_address_lookup(struct net *net, u8 addr); #define PN_NO_ADDR 0xff diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 9d211f12582b..13cb323f8c38 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -67,9 +67,6 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol) struct phonet_protocol *pnp; int err; - if (net != &init_net) - return -EAFNOSUPPORT; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -352,9 +349,6 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, struct sockaddr_pn sa; u16 len; - if (dev_net(dev) != &init_net) - goto out; - /* check we have at least a full Phonet header */ if (!pskb_pull(skb, sizeof(struct phonethdr))) goto out; @@ -373,7 +367,7 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, if (pn_sockaddr_get_addr(&sa) == 0) goto out; /* currently, we cannot be device 0 */ - sk = pn_find_sock_by_sa(&sa); + sk = pn_find_sock_by_sa(dev_net(dev), &sa); if (sk == NULL) { if (can_respond(skb)) { send_obj_unreachable(skb); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index f93ff8ef47d0..5491bf5e354b 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -76,7 +76,7 @@ struct net_device *phonet_device_get(struct net *net) dev = pnd->netdev; BUG_ON(!dev); - if (dev_net(dev) == net && + if (net_eq(dev_net(dev), net) && (dev->reg_state == NETREG_REGISTERED) && ((pnd->netdev->flags & IFF_UP)) == IFF_UP) break; @@ -140,12 +140,14 @@ u8 phonet_address_get(struct net_device *dev, u8 addr) return addr; } -int phonet_address_lookup(u8 addr) +int phonet_address_lookup(struct net *net, u8 addr) { struct phonet_device *pnd; spin_lock_bh(&pndevs.lock); list_for_each_entry(pnd, &pndevs.list, list) { + if (!net_eq(dev_net(pnd->netdev), net)) + continue; /* Don't allow unregistering devices! */ if ((pnd->netdev->reg_state != NETREG_REGISTERED) || ((pnd->netdev->flags & IFF_UP)) != IFF_UP) diff --git a/net/phonet/socket.c b/net/phonet/socket.c index d81740187fb4..c75aa5cdead5 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -57,7 +57,7 @@ static struct { * Find address based on socket address, match only certain fields. * Also grab sock if it was found. Remember to sock_put it later. */ -struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *spn) +struct sock *pn_find_sock_by_sa(struct net *net, const struct sockaddr_pn *spn) { struct hlist_node *node; struct sock *sknode; @@ -71,6 +71,8 @@ struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *spn) struct pn_sock *pn = pn_sk(sknode); BUG_ON(!pn->sobject); /* unbound socket */ + if (!net_eq(sock_net(sknode), net)) + continue; if (pn_port(obj)) { /* Look up socket by port */ if (pn_port(pn->sobject) != pn_port(obj)) @@ -130,7 +132,7 @@ static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len) handle = pn_sockaddr_get_object((struct sockaddr_pn *)addr); saddr = pn_addr(handle); - if (saddr && phonet_address_lookup(saddr)) + if (saddr && phonet_address_lookup(sock_net(sk), saddr)) return -EADDRNOTAVAIL; lock_sock(sk); @@ -361,6 +363,7 @@ static DEFINE_MUTEX(port_mutex); int pn_sock_get_port(struct sock *sk, unsigned short sport) { static int port_cur; + struct net *net = sock_net(sk); struct pn_sock *pn = pn_sk(sk); struct sockaddr_pn try_sa; struct sock *tmpsk; @@ -381,7 +384,7 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport) port_cur = pmin; pn_sockaddr_set_port(&try_sa, port_cur); - tmpsk = pn_find_sock_by_sa(&try_sa); + tmpsk = pn_find_sock_by_sa(net, &try_sa); if (tmpsk == NULL) { sport = port_cur; goto found; @@ -391,7 +394,7 @@ int pn_sock_get_port(struct sock *sk, unsigned short sport) } else { /* try to find specific port */ pn_sockaddr_set_port(&try_sa, sport); - tmpsk = pn_find_sock_by_sa(&try_sa); + tmpsk = pn_find_sock_by_sa(net, &try_sa); if (tmpsk == NULL) /* No sock there! We can use that port... */ goto found; -- cgit v1.2.3 From d253eee20195b25e298bf162a6e72f14bf4803e5 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 3 Dec 2008 15:52:35 -0800 Subject: can: Fix CAN_(EFF|RTR)_FLAG handling in can_filter Due to a wrong safety check in af_can.c it was not possible to filter for SFF frames with a specific CAN identifier without getting the same selected CAN identifier from a received EFF frame also. This fix has a minimum (but user visible) impact on the CAN filter API and therefore the CAN version is set to a new date. Indeed the 'old' API is still working as-is. But when now setting CAN_(EFF|RTR)_FLAG in can_filter.can_mask you might get less traffic than before - but still the stuff that you expected to get for your defined filter ... Thanks to Kurt Van Dijck for pointing at this issue and for the review. Signed-off-by: Oliver Hartkopp Acked-by: Kurt Van Dijck Signed-off-by: David S. Miller --- include/linux/can/core.h | 2 +- net/can/af_can.c | 63 ++++++++++++++++++++++++++++++++++++------------ net/can/bcm.c | 7 +++--- 3 files changed, 53 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index e9ca210ffa5b..f50785ad4781 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -19,7 +19,7 @@ #include #include -#define CAN_VERSION "20071116" +#define CAN_VERSION "20081130" /* increment this number each time you change some user-space interface */ #define CAN_ABI_VERSION "8" diff --git a/net/can/af_can.c b/net/can/af_can.c index 7d4d2b3c137e..d8173e50cb87 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -319,23 +319,52 @@ static struct dev_rcv_lists *find_dev_rcv_lists(struct net_device *dev) return n ? d : NULL; } +/** + * find_rcv_list - determine optimal filterlist inside device filter struct + * @can_id: pointer to CAN identifier of a given can_filter + * @mask: pointer to CAN mask of a given can_filter + * @d: pointer to the device filter struct + * + * Description: + * Returns the optimal filterlist to reduce the filter handling in the + * receive path. This function is called by service functions that need + * to register or unregister a can_filter in the filter lists. + * + * A filter matches in general, when + * + * & mask == can_id & mask + * + * so every bit set in the mask (even CAN_EFF_FLAG, CAN_RTR_FLAG) describe + * relevant bits for the filter. + * + * The filter can be inverted (CAN_INV_FILTER bit set in can_id) or it can + * filter for error frames (CAN_ERR_FLAG bit set in mask). For error frames + * there is a special filterlist and a special rx path filter handling. + * + * Return: + * Pointer to optimal filterlist for the given can_id/mask pair. + * Constistency checked mask. + * Reduced can_id to have a preprocessed filter compare value. + */ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, struct dev_rcv_lists *d) { canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */ - /* filter error frames */ + /* filter for error frames in extra filterlist */ if (*mask & CAN_ERR_FLAG) { - /* clear CAN_ERR_FLAG in list entry */ + /* clear CAN_ERR_FLAG in filter entry */ *mask &= CAN_ERR_MASK; return &d->rx[RX_ERR]; } - /* ensure valid values in can_mask */ - if (*mask & CAN_EFF_FLAG) - *mask &= (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG); - else - *mask &= (CAN_SFF_MASK | CAN_RTR_FLAG); + /* with cleared CAN_ERR_FLAG we have a simple mask/value filterpair */ + +#define CAN_EFF_RTR_FLAGS (CAN_EFF_FLAG | CAN_RTR_FLAG) + + /* ensure valid values in can_mask for 'SFF only' frame filtering */ + if ((*mask & CAN_EFF_FLAG) && !(*can_id & CAN_EFF_FLAG)) + *mask &= (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS); /* reduce condition testing at receive time */ *can_id &= *mask; @@ -348,15 +377,19 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, if (!(*mask)) return &d->rx[RX_ALL]; - /* use extra filterset for the subscription of exactly *ONE* can_id */ - if (*can_id & CAN_EFF_FLAG) { - if (*mask == (CAN_EFF_MASK | CAN_EFF_FLAG)) { - /* RFC: a use-case for hash-tables in the future? */ - return &d->rx[RX_EFF]; + /* extra filterlists for the subscription of a single non-RTR can_id */ + if (((*mask & CAN_EFF_RTR_FLAGS) == CAN_EFF_RTR_FLAGS) + && !(*can_id & CAN_RTR_FLAG)) { + + if (*can_id & CAN_EFF_FLAG) { + if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) { + /* RFC: a future use-case for hash-tables? */ + return &d->rx[RX_EFF]; + } + } else { + if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS)) + return &d->rx_sff[*can_id]; } - } else { - if (*mask == CAN_SFF_MASK) - return &d->rx_sff[*can_id]; } /* default: filter via can_id/can_mask */ diff --git a/net/can/bcm.c b/net/can/bcm.c index d0dd382001e2..da0d426c0ce4 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -64,10 +64,11 @@ #define BCM_CAN_DLC_MASK 0x0F /* clean private flags in can_dlc by masking */ /* get best masking value for can_rx_register() for a given single can_id */ -#define REGMASK(id) ((id & CAN_RTR_FLAG) | ((id & CAN_EFF_FLAG) ? \ - (CAN_EFF_MASK | CAN_EFF_FLAG) : CAN_SFF_MASK)) +#define REGMASK(id) ((id & CAN_EFF_FLAG) ? \ + (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \ + (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) -#define CAN_BCM_VERSION "20080415" +#define CAN_BCM_VERSION CAN_VERSION static __initdata const char banner[] = KERN_INFO "can: broadcast manager protocol (rev " CAN_BCM_VERSION ")\n"; -- cgit v1.2.3 From 8865c418caf4e9dd2c24bdfae3a5a4106e143e60 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 3 Dec 2008 22:12:38 -0800 Subject: atm: 32-bit ioctl compatibility We lack compat ioctl support through most of the ATM code. This patch deals with most of it, and I can now at least use BR2684 and PPPoATM with 32-bit userspace. I haven't added a .compat_ioctl method to struct atm_ioctl, because AFAICT none of the current users need any conversion -- so we can just call the ->ioctl() method in every case. I looked at br2684, clip, lec, mpc, pppoatm and atmtcp. In svc_compat_ioctl() the only mangling which is needed is to change COMPAT_ATM_ADDPARTY to ATM_ADDPARTY. Although it's defined as _IOW('a', ATMIOC_SPECIAL+4,struct atm_iobuf) it doesn't actually _take_ a struct atm_iobuf as an argument -- it takes a struct sockaddr_atmsvc, which _is_ the same between 32-bit and 64-bit code, so doesn't need conversion. Almost all of vcc_ioctl() would have been identical, so I converted that into a core do_vcc_ioctl() function with an 'int compat' argument. I've done the same with atm_dev_ioctl(), where there _are_ a few differences, but still it's relatively contained and there would otherwise have been a lot of duplication. I haven't done any of the actual device-specific ioctls, although I've added a compat_ioctl method to struct atmdev_ops. Signed-off-by: David Woodhouse Signed-off-by: David S. Miller --- include/linux/atm.h | 17 ++++++++-- include/linux/atmdev.h | 15 +++++++++ net/atm/common.h | 1 + net/atm/ioctl.c | 49 ++++++++++++++++++++++++---- net/atm/pvc.c | 3 ++ net/atm/resources.c | 88 ++++++++++++++++++++++++++++++++++++++------------ net/atm/resources.h | 2 +- net/atm/svc.c | 19 +++++++++++ 8 files changed, 164 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/include/linux/atm.h b/include/linux/atm.h index c791ddd96939..d3b292174aeb 100644 --- a/include/linux/atm.h +++ b/include/linux/atm.h @@ -231,10 +231,21 @@ static __inline__ int atmpvc_addr_in_use(struct sockaddr_atmpvc addr) */ struct atmif_sioc { - int number; - int length; - void __user *arg; + int number; + int length; + void __user *arg; }; +#ifdef __KERNEL__ +#ifdef CONFIG_COMPAT +#include +struct compat_atmif_sioc { + int number; + int length; + compat_uptr_t arg; +}; +#endif +#endif + typedef unsigned short atm_backend_t; #endif diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index a3d07c29d16c..086e5c362d3a 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -100,6 +100,10 @@ struct atm_dev_stats { /* use backend to make new if */ #define ATM_ADDPARTY _IOW('a', ATMIOC_SPECIAL+4,struct atm_iobuf) /* add party to p2mp call */ +#ifdef CONFIG_COMPAT +/* It actually takes struct sockaddr_atmsvc, not struct atm_iobuf */ +#define COMPAT_ATM_ADDPARTY _IOW('a', ATMIOC_SPECIAL+4,struct compat_atm_iobuf) +#endif #define ATM_DROPPARTY _IOW('a', ATMIOC_SPECIAL+5,int) /* drop party from p2mp call */ @@ -224,6 +228,13 @@ struct atm_cirange { extern struct proc_dir_entry *atm_proc_root; #endif +#ifdef CONFIG_COMPAT +#include +struct compat_atm_iobuf { + int length; + compat_uptr_t buffer; +}; +#endif struct k_atm_aal_stats { #define __HANDLE_ITEM(i) atomic_t i @@ -379,6 +390,10 @@ struct atmdev_ops { /* only send is required */ int (*open)(struct atm_vcc *vcc); void (*close)(struct atm_vcc *vcc); int (*ioctl)(struct atm_dev *dev,unsigned int cmd,void __user *arg); +#ifdef CONFIG_COMPAT + int (*compat_ioctl)(struct atm_dev *dev,unsigned int cmd, + void __user *arg); +#endif int (*getsockopt)(struct atm_vcc *vcc,int level,int optname, void __user *optval,int optlen); int (*setsockopt)(struct atm_vcc *vcc,int level,int optname, diff --git a/net/atm/common.h b/net/atm/common.h index 16f32c1fa1c9..92e2981f479f 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -19,6 +19,7 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t total_len); unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait); int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); +int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int vcc_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen); int vcc_getsockopt(struct socket *sock, int level, int optname, diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index 7afd8e7754fd..76ed3c8d26e6 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "resources.h" #include "signaling.h" /* for WAITING and sigd_attach */ @@ -46,7 +47,7 @@ void deregister_atm_ioctl(struct atm_ioctl *ioctl) EXPORT_SYMBOL(register_atm_ioctl); EXPORT_SYMBOL(deregister_atm_ioctl); -int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg, int compat) { struct sock *sk = sock->sk; struct atm_vcc *vcc; @@ -80,13 +81,25 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) goto done; } case SIOCGSTAMP: /* borrowed from IP */ - error = sock_get_timestamp(sk, argp); +#ifdef CONFIG_COMPAT + if (compat) + error = compat_sock_get_timestamp(sk, argp); + else +#endif + error = sock_get_timestamp(sk, argp); goto done; case SIOCGSTAMPNS: /* borrowed from IP */ - error = sock_get_timestampns(sk, argp); +#ifdef CONFIG_COMPAT + if (compat) + error = compat_sock_get_timestampns(sk, argp); + else +#endif + error = sock_get_timestampns(sk, argp); goto done; case ATM_SETSC: - printk(KERN_WARNING "ATM_SETSC is obsolete\n"); + if (net_ratelimit()) + printk(KERN_WARNING "ATM_SETSC is obsolete; used by %s:%d\n", + current->comm, task_pid_nr(current)); error = 0; goto done; case ATMSIGD_CTRL: @@ -99,12 +112,23 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) * info uses kernel pointers as opaque references, * so the holder of the file descriptor can scribble * on the kernel... so we should make sure that we - * have the same privledges that /proc/kcore needs + * have the same privileges that /proc/kcore needs */ if (!capable(CAP_SYS_RAWIO)) { error = -EPERM; goto done; } +#ifdef CONFIG_COMPAT + /* WTF? I don't even want to _think_ about making this + work for 32-bit userspace. TBH I don't really want + to think about it at all. dwmw2. */ + if (compat) { + if (net_ratelimit()) + printk(KERN_WARNING "32-bit task cannot be atmsigd\n"); + error = -EINVAL; + goto done; + } +#endif error = sigd_attach(vcc); if (!error) sock->state = SS_CONNECTED; @@ -155,8 +179,21 @@ int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) if (error != -ENOIOCTLCMD) goto done; - error = atm_dev_ioctl(cmd, argp); + error = atm_dev_ioctl(cmd, argp, compat); done: return error; } + + +int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + return do_vcc_ioctl(sock, cmd, arg, 0); +} + +#ifdef CONFIG_COMPAT +int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + return do_vcc_ioctl(sock, cmd, arg, 1); +} +#endif diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 43e8bf5ed001..e1d22d9430dd 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -113,6 +113,9 @@ static const struct proto_ops pvc_proto_ops = { .getname = pvc_getname, .poll = vcc_poll, .ioctl = vcc_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = vcc_compat_ioctl, +#endif .listen = sock_no_listen, .shutdown = pvc_shutdown, .setsockopt = pvc_setsockopt, diff --git a/net/atm/resources.c b/net/atm/resources.c index a34ba948af96..56b7322ff461 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -195,20 +195,39 @@ static int fetch_stats(struct atm_dev *dev, struct atm_dev_stats __user *arg, in } -int atm_dev_ioctl(unsigned int cmd, void __user *arg) +int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat) { void __user *buf; int error, len, number, size = 0; struct atm_dev *dev; struct list_head *p; int *tmp_buf, *tmp_p; - struct atm_iobuf __user *iobuf = arg; - struct atmif_sioc __user *sioc = arg; + int __user *sioc_len; + int __user *iobuf_len; + +#ifndef CONFIG_COMPAT + compat = 0; /* Just so the compiler _knows_ */ +#endif + switch (cmd) { case ATM_GETNAMES: - if (get_user(buf, &iobuf->buffer)) - return -EFAULT; - if (get_user(len, &iobuf->length)) + + if (compat) { +#ifdef CONFIG_COMPAT + struct compat_atm_iobuf __user *ciobuf = arg; + compat_uptr_t cbuf; + iobuf_len = &ciobuf->length; + if (get_user(cbuf, &ciobuf->buffer)) + return -EFAULT; + buf = compat_ptr(cbuf); +#endif + } else { + struct atm_iobuf __user *iobuf = arg; + iobuf_len = &iobuf->length; + if (get_user(buf, &iobuf->buffer)) + return -EFAULT; + } + if (get_user(len, iobuf_len)) return -EFAULT; mutex_lock(&atm_dev_mutex); list_for_each(p, &atm_devs) @@ -229,7 +248,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg) } mutex_unlock(&atm_dev_mutex); error = ((copy_to_user(buf, tmp_buf, size)) || - put_user(size, &iobuf->length)) + put_user(size, iobuf_len)) ? -EFAULT : 0; kfree(tmp_buf); return error; @@ -237,13 +256,32 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg) break; } - if (get_user(buf, &sioc->arg)) - return -EFAULT; - if (get_user(len, &sioc->length)) - return -EFAULT; - if (get_user(number, &sioc->number)) - return -EFAULT; - + if (compat) { +#ifdef CONFIG_COMPAT + struct compat_atmif_sioc __user *csioc = arg; + compat_uptr_t carg; + + sioc_len = &csioc->length; + if (get_user(carg, &csioc->arg)) + return -EFAULT; + buf = compat_ptr(carg); + + if (get_user(len, &csioc->length)) + return -EFAULT; + if (get_user(number, &csioc->number)) + return -EFAULT; +#endif + } else { + struct atmif_sioc __user *sioc = arg; + + sioc_len = &sioc->length; + if (get_user(buf, &sioc->arg)) + return -EFAULT; + if (get_user(len, &sioc->length)) + return -EFAULT; + if (get_user(number, &sioc->number)) + return -EFAULT; + } if (!(dev = try_then_request_module(atm_dev_lookup(number), "atm-device-%d", number))) return -ENODEV; @@ -358,7 +396,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg) size = error; /* may return 0, but later on size == 0 means "don't write the length" */ - error = put_user(size, &sioc->length) + error = put_user(size, sioc_len) ? -EFAULT : 0; goto done; case ATM_SETLOOP: @@ -380,11 +418,21 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg) } /* fall through */ default: - if (!dev->ops->ioctl) { - error = -EINVAL; - goto done; + if (compat) { +#ifdef CONFIG_COMPAT + if (!dev->ops->compat_ioctl) { + error = -EINVAL; + goto done; + } + size = dev->ops->compat_ioctl(dev, cmd, buf); +#endif + } else { + if (!dev->ops->ioctl) { + error = -EINVAL; + goto done; + } + size = dev->ops->ioctl(dev, cmd, buf); } - size = dev->ops->ioctl(dev, cmd, buf); if (size < 0) { error = (size == -ENOIOCTLCMD ? -EINVAL : size); goto done; @@ -392,7 +440,7 @@ int atm_dev_ioctl(unsigned int cmd, void __user *arg) } if (size) - error = put_user(size, &sioc->length) + error = put_user(size, sioc_len) ? -EFAULT : 0; else error = 0; diff --git a/net/atm/resources.h b/net/atm/resources.h index 1d004aaaeec1..126fb1840dfb 100644 --- a/net/atm/resources.h +++ b/net/atm/resources.h @@ -13,7 +13,7 @@ extern struct list_head atm_devs; extern struct mutex atm_dev_mutex; -int atm_dev_ioctl(unsigned int cmd, void __user *arg); +int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat); #ifdef CONFIG_PROC_FS diff --git a/net/atm/svc.c b/net/atm/svc.c index de1e4f2f3a43..e9c65500f84e 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -604,6 +604,22 @@ static int svc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return error; } +#ifdef CONFIG_COMPAT +static int svc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + /* The definition of ATM_ADDPARTY uses the size of struct atm_iobuf. + But actually it takes a struct sockaddr_atmsvc, which doesn't need + compat handling. So all we have to do is fix up cmd... */ + if (cmd == COMPAT_ATM_ADDPARTY) + cmd = ATM_ADDPARTY; + + if (cmd == ATM_ADDPARTY || cmd == ATM_DROPPARTY) + return svc_ioctl(sock, cmd, arg); + else + return vcc_compat_ioctl(sock, cmd, arg); +} +#endif /* CONFIG_COMPAT */ + static const struct proto_ops svc_proto_ops = { .family = PF_ATMSVC, .owner = THIS_MODULE, @@ -616,6 +632,9 @@ static const struct proto_ops svc_proto_ops = { .getname = svc_getname, .poll = vcc_poll, .ioctl = svc_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = svc_compat_ioctl, +#endif .listen = svc_listen, .shutdown = svc_shutdown, .setsockopt = svc_setsockopt, -- cgit v1.2.3 From ea4e2bc4d9f7370e57a343ccb5e7c0ad3222ec3c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 3 Dec 2008 15:36:57 -0500 Subject: ftrace: graph of a single function This patch adds the file: /debugfs/tracing/set_graph_function which can be used along with the function graph tracer. When this file is empty, the function graph tracer will act as usual. When the file has a function in it, the function graph tracer will only trace that function. For example: # echo blk_unplug > /debugfs/tracing/set_graph_function # cat /debugfs/tracing/trace [...] ------------------------------------------ | 2) make-19003 => kjournald-2219 ------------------------------------------ 2) | blk_unplug() { 2) | dm_unplug_all() { 2) | dm_get_table() { 2) 1.381 us | _read_lock(); 2) 0.911 us | dm_table_get(); 2) 1. 76 us | _read_unlock(); 2) + 12.912 us | } 2) | dm_table_unplug_all() { 2) | blk_unplug() { 2) 0.778 us | generic_unplug_device(); 2) 2.409 us | } 2) 5.992 us | } 2) 0.813 us | dm_table_put(); 2) + 29. 90 us | } 2) + 34.532 us | } You can add up to 32 functions into this file. Currently we limit it to 32, but this may change with later improvements. To add another function, use the append '>>': # echo sys_read >> /debugfs/tracing/set_graph_function # cat /debugfs/tracing/set_graph_function blk_unplug sys_read Using the '>' will clear out the function and write anew: # echo sys_write > /debug/tracing/set_graph_function # cat /debug/tracing/set_graph_function sys_write Note, if you have function graph running while doing this, the small time between clearing it and updating it will cause the graph to record all functions. This should not be an issue because after it sets the filter, only those functions will be recorded from then on. If you need to only record a particular function then set this file first before starting the function graph tracer. In the future this side effect may be corrected. The set_graph_function file is similar to the set_ftrace_filter but it does not take wild cards nor does it allow for more than one function to be set with a single write. There is no technical reason why this is the case, I just do not have the time yet to implement that. Note, dynamic ftrace must be enabled for this to appear because it uses the dynamic ftrace records to match the name to the mcount call sites. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 46 ++++++++++ include/linux/sched.h | 4 + kernel/trace/ftrace.c | 227 +++++++++++++++++++++++++++++++++++++++++++++++++ kernel/trace/trace.c | 8 ++ kernel/trace/trace.h | 30 ++++++- 5 files changed, 314 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 469ceb3e85ba..b295d3106bfe 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -7,6 +7,7 @@ #include #include #include +#include #ifdef CONFIG_FUNCTION_TRACER @@ -391,4 +392,49 @@ static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } #endif +#ifdef CONFIG_TRACING +#include + +/* flags for current->trace */ +enum { + TSK_TRACE_FL_TRACE_BIT = 0, + TSK_TRACE_FL_GRAPH_BIT = 1, +}; +enum { + TSK_TRACE_FL_TRACE = 1 << TSK_TRACE_FL_TRACE_BIT, + TSK_TRACE_FL_GRAPH = 1 << TSK_TRACE_FL_GRAPH_BIT, +}; + +static inline void set_tsk_trace_trace(struct task_struct *tsk) +{ + set_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace); +} + +static inline void clear_tsk_trace_trace(struct task_struct *tsk) +{ + clear_bit(TSK_TRACE_FL_TRACE_BIT, &tsk->trace); +} + +static inline int test_tsk_trace_trace(struct task_struct *tsk) +{ + return tsk->trace & TSK_TRACE_FL_TRACE; +} + +static inline void set_tsk_trace_graph(struct task_struct *tsk) +{ + set_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace); +} + +static inline void clear_tsk_trace_graph(struct task_struct *tsk) +{ + clear_bit(TSK_TRACE_FL_GRAPH_BIT, &tsk->trace); +} + +static inline int test_tsk_trace_graph(struct task_struct *tsk) +{ + return tsk->trace & TSK_TRACE_FL_GRAPH; +} + +#endif /* CONFIG_TRACING */ + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 2d0a93c31228..4c152e0acc9e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1380,6 +1380,10 @@ struct task_struct { */ atomic_t trace_overrun; #endif +#ifdef CONFIG_TRACING + /* state flags for use by tracers */ + unsigned long trace; +#endif }; /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 65b9e863056b..b17a30350f06 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1320,6 +1320,224 @@ static struct file_operations ftrace_notrace_fops = { .release = ftrace_notrace_release, }; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +static DEFINE_MUTEX(graph_lock); + +int ftrace_graph_count; +unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; + +static void * +g_next(struct seq_file *m, void *v, loff_t *pos) +{ + unsigned long *array = m->private; + int index = *pos; + + (*pos)++; + + if (index >= ftrace_graph_count) + return NULL; + + return &array[index]; +} + +static void *g_start(struct seq_file *m, loff_t *pos) +{ + void *p = NULL; + + mutex_lock(&graph_lock); + + p = g_next(m, p, pos); + + return p; +} + +static void g_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&graph_lock); +} + +static int g_show(struct seq_file *m, void *v) +{ + unsigned long *ptr = v; + char str[KSYM_SYMBOL_LEN]; + + if (!ptr) + return 0; + + kallsyms_lookup(*ptr, NULL, NULL, NULL, str); + + seq_printf(m, "%s\n", str); + + return 0; +} + +static struct seq_operations ftrace_graph_seq_ops = { + .start = g_start, + .next = g_next, + .stop = g_stop, + .show = g_show, +}; + +static int +ftrace_graph_open(struct inode *inode, struct file *file) +{ + int ret = 0; + + if (unlikely(ftrace_disabled)) + return -ENODEV; + + mutex_lock(&graph_lock); + if ((file->f_mode & FMODE_WRITE) && + !(file->f_flags & O_APPEND)) { + ftrace_graph_count = 0; + memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); + } + + if (file->f_mode & FMODE_READ) { + ret = seq_open(file, &ftrace_graph_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = ftrace_graph_funcs; + } + } else + file->private_data = ftrace_graph_funcs; + mutex_unlock(&graph_lock); + + return ret; +} + +static ssize_t +ftrace_graph_read(struct file *file, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + if (file->f_mode & FMODE_READ) + return seq_read(file, ubuf, cnt, ppos); + else + return -EPERM; +} + +static int +ftrace_set_func(unsigned long *array, int idx, char *buffer) +{ + char str[KSYM_SYMBOL_LEN]; + struct dyn_ftrace *rec; + struct ftrace_page *pg; + int found = 0; + int i; + + if (ftrace_disabled) + return -ENODEV; + + /* should not be called from interrupt context */ + spin_lock(&ftrace_lock); + + for (pg = ftrace_pages_start; pg; pg = pg->next) { + for (i = 0; i < pg->index; i++) { + rec = &pg->records[i]; + + if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) + continue; + + kallsyms_lookup(rec->ip, NULL, NULL, NULL, str); + if (strcmp(str, buffer) == 0) { + found = 1; + array[idx] = rec->ip; + break; + } + } + } + spin_unlock(&ftrace_lock); + + return found ? 0 : -EINVAL; +} + +static ssize_t +ftrace_graph_write(struct file *file, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned char buffer[FTRACE_BUFF_MAX+1]; + unsigned long *array; + size_t read = 0; + ssize_t ret; + int index = 0; + char ch; + + if (!cnt || cnt < 0) + return 0; + + mutex_lock(&graph_lock); + + if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) { + ret = -EBUSY; + goto out; + } + + if (file->f_mode & FMODE_READ) { + struct seq_file *m = file->private_data; + array = m->private; + } else + array = file->private_data; + + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + + /* skip white space */ + while (cnt && isspace(ch)) { + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + } + + if (isspace(ch)) { + *ppos += read; + ret = read; + goto out; + } + + while (cnt && !isspace(ch)) { + if (index < FTRACE_BUFF_MAX) + buffer[index++] = ch; + else { + ret = -EINVAL; + goto out; + } + ret = get_user(ch, ubuf++); + if (ret) + goto out; + read++; + cnt--; + } + buffer[index] = 0; + + /* we allow only one at a time */ + ret = ftrace_set_func(array, ftrace_graph_count, buffer); + if (ret) + goto out; + + ftrace_graph_count++; + + file->f_pos += read; + + ret = read; + out: + mutex_unlock(&graph_lock); + + return ret; +} + +static const struct file_operations ftrace_graph_fops = { + .open = ftrace_graph_open, + .read = ftrace_graph_read, + .write = ftrace_graph_write, +}; +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { struct dentry *entry; @@ -1347,6 +1565,15 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer) pr_warning("Could not create debugfs " "'set_ftrace_notrace' entry\n"); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + entry = debugfs_create_file("set_graph_function", 0444, d_tracer, + NULL, + &ftrace_graph_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'set_graph_function' entry\n"); +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + return 0; } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8b6409a62b54..710b39acd81b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1209,6 +1209,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) int cpu; int pc; + if (!ftrace_graph_addr(trace->func)) + return 0; + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; @@ -1217,6 +1220,9 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) pc = preempt_count(); __trace_graph_entry(tr, data, trace, flags, pc); } + /* Only do the atomic if it is not already set */ + if (!test_tsk_trace_graph(current)) + set_tsk_trace_graph(current); atomic_dec(&data->disabled); local_irq_restore(flags); @@ -1240,6 +1246,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace) pc = preempt_count(); __trace_graph_return(tr, data, trace, flags, pc); } + if (!trace->depth) + clear_tsk_trace_graph(current); atomic_dec(&data->disabled); local_irq_restore(flags); } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 0565ae9a2210..41f026bfc9ed 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -505,13 +505,41 @@ extern unsigned long trace_flags; /* Standard output formatting function used for function return traces */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern enum print_line_t print_graph_function(struct trace_iterator *iter); + +#ifdef CONFIG_DYNAMIC_FTRACE +/* TODO: make this variable */ +#define FTRACE_GRAPH_MAX_FUNCS 32 +extern int ftrace_graph_count; +extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; + +static inline int ftrace_graph_addr(unsigned long addr) +{ + int i; + + if (!ftrace_graph_count || test_tsk_trace_graph(current)) + return 1; + + for (i = 0; i < ftrace_graph_count; i++) { + if (addr == ftrace_graph_funcs[i]) + return 1; + } + + return 0; +} #else +static inline int ftrace_trace_addr(unsigned long addr) +{ + return 1 +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t print_graph_function(struct trace_iterator *iter) { return TRACE_TYPE_UNHANDLED; } -#endif +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ /* * trace_iterator_flags is an enumeration that defines bit -- cgit v1.2.3 From 5ef6476190d24419a9a537baa0b5641845136989 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 4 Dec 2008 00:26:39 -0500 Subject: pid: fix the do_each_pid_task() macro Impact: macro side-effects fix This patch adds parenthesis around 'pid' in the do_each_pid_task macro to allow callers to pass in more complex parameters. e.g. do_each_pid_task(*pid, type, task) Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/pid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pid.h b/include/linux/pid.h index d7e98ff8021e..bb206c56d1f0 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -147,9 +147,9 @@ pid_t pid_vnr(struct pid *pid); #define do_each_pid_task(pid, type, task) \ do { \ struct hlist_node *pos___; \ - if (pid != NULL) \ + if ((pid) != NULL) \ hlist_for_each_entry_rcu((task), pos___, \ - &pid->tasks[type], pids[type].node) { + &(pid)->tasks[type], pids[type].node) { /* * Both old and new leaders may be attached to -- cgit v1.2.3 From 00ef9f7348dfd2fc223ec42aceb30836e86b367f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Dec 2008 09:00:17 +0100 Subject: lockdep: change a held lock's class Impact: introduce new lockdep API Allow to change a held lock's class. Basically the same as the existing code to change a subclass therefore reuse all that. The XFS code will be able to use this to annotate their inode locking. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 12 ++++++++++-- kernel/lockdep.c | 24 +++++++++--------------- 2 files changed, 19 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 8956daf64abd..37a0361f4685 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -314,8 +314,15 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, extern void lock_release(struct lockdep_map *lock, int nested, unsigned long ip); -extern void lock_set_subclass(struct lockdep_map *lock, unsigned int subclass, - unsigned long ip); +extern void lock_set_class(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, unsigned int subclass, + unsigned long ip); + +static inline void lock_set_subclass(struct lockdep_map *lock, + unsigned int subclass, unsigned long ip) +{ + lock_set_class(lock, lock->name, lock->key, subclass, ip); +} # define INIT_LOCKDEP .lockdep_recursion = 0, @@ -333,6 +340,7 @@ static inline void lockdep_on(void) # define lock_acquire(l, s, t, r, c, n, i) do { } while (0) # define lock_release(l, n, i) do { } while (0) +# define lock_set_class(l, n, k, s, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) # define lockdep_info() do { } while (0) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 90f3fb64dbce..4fa6eeb4e8a7 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -291,14 +291,12 @@ void lockdep_off(void) { current->lockdep_recursion++; } - EXPORT_SYMBOL(lockdep_off); void lockdep_on(void) { current->lockdep_recursion--; } - EXPORT_SYMBOL(lockdep_on); /* @@ -2513,7 +2511,6 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, if (subclass) register_lock_class(lock, subclass, 1); } - EXPORT_SYMBOL_GPL(lockdep_init_map); /* @@ -2694,8 +2691,9 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock, } static int -__lock_set_subclass(struct lockdep_map *lock, - unsigned int subclass, unsigned long ip) +__lock_set_class(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, unsigned int subclass, + unsigned long ip) { struct task_struct *curr = current; struct held_lock *hlock, *prev_hlock; @@ -2722,6 +2720,7 @@ __lock_set_subclass(struct lockdep_map *lock, return print_unlock_inbalance_bug(curr, lock, ip); found_it: + lockdep_init_map(lock, name, key, 0); class = register_lock_class(lock, subclass, 0); hlock->class_idx = class - lock_classes + 1; @@ -2906,9 +2905,9 @@ static void check_flags(unsigned long flags) #endif } -void -lock_set_subclass(struct lockdep_map *lock, - unsigned int subclass, unsigned long ip) +void lock_set_class(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, unsigned int subclass, + unsigned long ip) { unsigned long flags; @@ -2918,13 +2917,12 @@ lock_set_subclass(struct lockdep_map *lock, raw_local_irq_save(flags); current->lockdep_recursion = 1; check_flags(flags); - if (__lock_set_subclass(lock, subclass, ip)) + if (__lock_set_class(lock, name, key, subclass, ip)) check_chain_key(current); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } - -EXPORT_SYMBOL_GPL(lock_set_subclass); +EXPORT_SYMBOL_GPL(lock_set_class); /* * We are not always called with irqs disabled - do that here, @@ -2948,7 +2946,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, current->lockdep_recursion = 0; raw_local_irq_restore(flags); } - EXPORT_SYMBOL_GPL(lock_acquire); void lock_release(struct lockdep_map *lock, int nested, @@ -2966,7 +2963,6 @@ void lock_release(struct lockdep_map *lock, int nested, current->lockdep_recursion = 0; raw_local_irq_restore(flags); } - EXPORT_SYMBOL_GPL(lock_release); #ifdef CONFIG_LOCK_STAT @@ -3451,7 +3447,6 @@ retry: if (unlock) read_unlock(&tasklist_lock); } - EXPORT_SYMBOL_GPL(debug_show_all_locks); /* @@ -3472,7 +3467,6 @@ void debug_show_held_locks(struct task_struct *task) { __debug_show_held_locks(task); } - EXPORT_SYMBOL_GPL(debug_show_held_locks); void lockdep_sys_exit(void) -- cgit v1.2.3 From 32c8dabc97d436582298ebd0e33af041c69f5a4b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 3 Dec 2008 19:41:13 +0000 Subject: ASoC: Remove obsolete declaration of struct snd_soc_clock_info The struct is never defined. Signed-off-by: Mark Brown --- include/sound/soc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 8ec63c02dc10..79d855d2bddd 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -152,7 +152,6 @@ struct snd_soc_dai; struct snd_soc_codec; struct soc_enum; struct snd_soc_ac97_ops; -struct snd_soc_clock_info; typedef int (*hw_write_t)(void *,const char* ,int); typedef int (*hw_read_t)(void *,char* ,int); -- cgit v1.2.3 From c9bb6003dd096ad190e1594a7d835ae1c39fae8f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 29 Oct 2008 23:46:09 -0700 Subject: of: Fix comment, sparc no longer uses of_device objects on special busses. It only uses of_platform_bus_type. Signed-off-by: David S. Miller --- include/linux/of_platform.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index a8efcfeea732..3d327b67d7e2 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -26,8 +26,7 @@ extern struct bus_type of_platform_bus_type; /* * An of_platform_driver driver is attached to a basic of_device on - * the "platform bus" (of_platform_bus_type) (or ISA, EBUS and SBUS - * busses on sparc). + * the "platform bus" (of_platform_bus_type). */ struct of_platform_driver { -- cgit v1.2.3 From 21a8c466f99063eeb8567318b4e305eda9015408 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 4 Dec 2008 23:51:23 +0100 Subject: tracing/ftrace: provide the macro task_curr_ret_stack() Impact: cleanup As suggested by Steven Rostedt, this patch provide a new macro task_curr_ret_stack() to move the cpp conditionnal CONFIG into the linux/ftrace.h headers. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 11 +++++++++++ kernel/trace/trace.c | 8 +------- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b295d3106bfe..b9b4d0a22d10 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -8,6 +8,7 @@ #include #include #include +#include #ifdef CONFIG_FUNCTION_TRACER @@ -387,9 +388,19 @@ extern void unregister_ftrace_graph(void); extern void ftrace_graph_init_task(struct task_struct *t); extern void ftrace_graph_exit_task(struct task_struct *t); + +static inline int task_curr_ret_stack(struct task_struct *t) +{ + return t->curr_ret_stack; +} #else static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } + +static inline int task_curr_ret_stack(struct task_struct *tsk) +{ + return -1; +} #endif #ifdef CONFIG_TRACING diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5dca6ef1fbeb..7a93c663e52a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3657,13 +3657,7 @@ int __ftrace_printk(unsigned long ip, const char *fmt, ...) return 0; va_start(ap, fmt); - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - ret = trace_vprintk(ip, current->curr_ret_stack, fmt, ap); -#else - ret = trace_vprintk(ip, -1, fmt, ap); -#endif - + ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap); va_end(ap); return ret; } -- cgit v1.2.3 From 72bdcf34380917260da41e3c49e10edee04bc5cd Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Wed, 26 Nov 2008 16:15:24 +0200 Subject: nl80211: Add frequency configuration (including HT40) This patch adds new NL80211_CMD_SET_WIPHY attributes NL80211_ATTR_WIPHY_FREQ and NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET to allow userspace to set the operating channel (e.g., hostapd for AP mode). Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 23 +++++++++++++++++-- include/net/cfg80211.h | 9 ++++++++ include/net/mac80211.h | 3 +++ net/mac80211/cfg.c | 13 +++++++++++ net/mac80211/ieee80211_i.h | 1 + net/mac80211/main.c | 30 ++++++++++++++++++++---- net/mac80211/util.c | 1 + net/wireless/nl80211.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 131 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e08c8bcfb78d..92f79d2bdd8c 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -26,8 +26,9 @@ * @NL80211_CMD_GET_WIPHY: request information about a wiphy or dump request * to get a list of all present wiphys. * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or - * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME - * and/or %NL80211_ATTR_WIPHY_TXQ_PARAMS. + * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME, + * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ, and/or + * %NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET. * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request * or rename notification. Has attributes %NL80211_ATTR_WIPHY and * %NL80211_ATTR_WIPHY_NAME. @@ -180,6 +181,14 @@ enum nl80211_commands { * /sys/class/ieee80211//index * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming) * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters + * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz + * @NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET: included with NL80211_ATTR_WIPHY_FREQ + * if HT20 or HT40 are allowed (i.e., 802.11n disabled if not included): + * NL80211_SEC_CHAN_NO_HT = HT not allowed (i.e., same as not including + * this attribute) + * NL80211_SEC_CHAN_DISABLED = HT20 only + * NL80211_SEC_CHAN_BELOW = secondary channel is below the primary channel + * NL80211_SEC_CHAN_ABOVE = secondary channel is above the primary channel * * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on * @NL80211_ATTR_IFNAME: network interface name @@ -315,6 +324,8 @@ enum nl80211_attrs { NL80211_ATTR_BSS_BASIC_RATES, NL80211_ATTR_WIPHY_TXQ_PARAMS, + NL80211_ATTR_WIPHY_FREQ, + NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET, /* add attributes here, update the policy in nl80211.c */ @@ -329,6 +340,8 @@ enum nl80211_attrs { #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY #define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES #define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS +#define NL80211_ATTR_WIPHY_FREQ NL80211_ATTR_WIPHY_FREQ +#define NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_REG_RULES 32 @@ -742,4 +755,10 @@ enum nl80211_txq_q { NL80211_TXQ_Q_BK }; +enum nl80211_sec_chan_offset { + NL80211_SEC_CHAN_NO_HT /* No HT */, + NL80211_SEC_CHAN_DISABLED /* HT20 only */, + NL80211_SEC_CHAN_BELOW /* HT40- */, + NL80211_SEC_CHAN_ABOVE /* HT40+ */ +}; #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1d57835d73f2..53b06f6c62ca 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -392,6 +392,9 @@ struct ieee80211_txq_params { /* from net/wireless.h */ struct wiphy; +/* from net/ieee80211.h */ +struct ieee80211_channel; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -450,6 +453,8 @@ struct wiphy; * @change_bss: Modify parameters for a given BSS. * * @set_txq_params: Set TX queue parameters + * + * @set_channel: Set channel */ struct cfg80211_ops { int (*add_virtual_intf)(struct wiphy *wiphy, char *name, @@ -513,6 +518,10 @@ struct cfg80211_ops { int (*set_txq_params)(struct wiphy *wiphy, struct ieee80211_txq_params *params); + + int (*set_channel)(struct wiphy *wiphy, + struct ieee80211_channel *chan, + enum nl80211_sec_chan_offset); }; #endif /* __NET_CFG80211_H */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6a1d4ea18186..6e823cc6a4b1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -507,6 +507,9 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) struct ieee80211_ht_conf { bool enabled; + int sec_chan_offset; /* 0 = HT40 disabled; -1 = HT40 enabled, secondary + * channel below primary; 1 = HT40 enabled, + * secondary channel above primary */ }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 16423f94801b..7a7a6c176dc5 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1095,6 +1095,18 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy, return 0; } +static int ieee80211_set_channel(struct wiphy *wiphy, + struct ieee80211_channel *chan, + enum nl80211_sec_chan_offset sec_chan_offset) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + local->oper_channel = chan; + local->oper_sec_chan_offset = sec_chan_offset; + + return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1122,4 +1134,5 @@ struct cfg80211_ops mac80211_config_ops = { #endif .change_bss = ieee80211_change_bss, .set_txq_params = ieee80211_set_txq_params, + .set_channel = ieee80211_set_channel, }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 155a20410017..527205f8c1a1 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -626,6 +626,7 @@ struct ieee80211_local { struct delayed_work scan_work; struct ieee80211_sub_if_data *scan_sdata; struct ieee80211_channel *oper_channel, *scan_channel; + enum nl80211_sec_chan_offset oper_sec_chan_offset; u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; size_t scan_ssid_len; struct list_head bss_list; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index cec9b6d3e1ce..29c3ecf7e914 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -195,20 +195,42 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) struct ieee80211_channel *chan; int ret = 0; int power; + enum nl80211_sec_chan_offset sec_chan_offset; might_sleep(); - if (local->sw_scanning) + if (local->sw_scanning) { chan = local->scan_channel; - else + sec_chan_offset = NL80211_SEC_CHAN_NO_HT; + } else { chan = local->oper_channel; + sec_chan_offset = local->oper_sec_chan_offset; + } - if (chan != local->hw.conf.channel) { + if (chan != local->hw.conf.channel || + sec_chan_offset != local->hw.conf.ht.sec_chan_offset) { local->hw.conf.channel = chan; + switch (sec_chan_offset) { + case NL80211_SEC_CHAN_NO_HT: + local->hw.conf.ht.enabled = false; + local->hw.conf.ht.sec_chan_offset = 0; + break; + case NL80211_SEC_CHAN_DISABLED: + local->hw.conf.ht.enabled = true; + local->hw.conf.ht.sec_chan_offset = 0; + break; + case NL80211_SEC_CHAN_BELOW: + local->hw.conf.ht.enabled = true; + local->hw.conf.ht.sec_chan_offset = -1; + break; + case NL80211_SEC_CHAN_ABOVE: + local->hw.conf.ht.enabled = true; + local->hw.conf.ht.sec_chan_offset = 1; + break; + } changed |= IEEE80211_CONF_CHANGE_CHANNEL; } - if (!local->hw.conf.power_level) power = chan->max_power; else diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0f841317c7e9..505d68f344ce 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -641,6 +641,7 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz) chan->flags & IEEE80211_CHAN_NO_IBSS) return ret; local->oper_channel = chan; + local->oper_sec_chan_offset = NL80211_SEC_CHAN_NO_HT; if (local->sw_scanning || local->hw_scanning) ret = 0; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c9141e3df9ba..9caee6022e3f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -59,6 +59,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, .len = BUS_ID_SIZE-1 }, [NL80211_ATTR_WIPHY_TXQ_PARAMS] = { .type = NLA_NESTED }, + [NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET] = { .type = NLA_U32 }, [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 }, [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, @@ -359,6 +361,61 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } } + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { + enum nl80211_sec_chan_offset sec_chan_offset = + NL80211_SEC_CHAN_NO_HT; + struct ieee80211_channel *chan; + u32 freq, sec_freq; + + if (!rdev->ops->set_channel) { + result = -EOPNOTSUPP; + goto bad_res; + } + + if (info->attrs[NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET]) { + sec_chan_offset = nla_get_u32( + info->attrs[ + NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET]); + if (sec_chan_offset != NL80211_SEC_CHAN_NO_HT && + sec_chan_offset != NL80211_SEC_CHAN_DISABLED && + sec_chan_offset != NL80211_SEC_CHAN_BELOW && + sec_chan_offset != NL80211_SEC_CHAN_ABOVE) { + result = -EINVAL; + goto bad_res; + } + } + + freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); + chan = ieee80211_get_channel(&rdev->wiphy, freq); + if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) { + /* Primary channel not allowed */ + result = -EINVAL; + goto bad_res; + } + if (sec_chan_offset == NL80211_SEC_CHAN_BELOW) + sec_freq = freq - 20; + else if (sec_chan_offset == NL80211_SEC_CHAN_ABOVE) + sec_freq = freq + 20; + else + sec_freq = 0; + + if (sec_freq) { + struct ieee80211_channel *schan; + schan = ieee80211_get_channel(&rdev->wiphy, sec_freq); + if (!schan || schan->flags & IEEE80211_CHAN_DISABLED) { + /* Secondary channel not allowed */ + result = -EINVAL; + goto bad_res; + } + } + + result = rdev->ops->set_channel(&rdev->wiphy, chan, + sec_chan_offset); + if (result) + goto bad_res; + } + + bad_res: cfg80211_put_dev(rdev); return result; -- cgit v1.2.3 From 10ec4f1d0851eb97cd53db66150835dd7f64829d Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 26 Nov 2008 13:03:08 -0800 Subject: nl80211: relicense nl80211.h under the ISC We have a few BSD/ISC licensed userspace applications which include nl80211.h from the kernel. To avoid legal ambiguity for usage of the header file in these projects we rather simply relicense the header file under the ISC. We've received consent from all contributors to it. Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Acked-by: Michael Wu Acked-by: Luis Carlos Cobo Acked-by: Michael Buesch Acked-by: Jouni Malinen Acked-by: Colin McCabe Acked-by: Javier Cardona Cc: johannes@sipsolutions.net Cc: altape@eden.rutgers.edu Cc: luisca@cozybit.com Cc: mb@bu3sch.de Cc: jouni.malinen@atheros.com Cc: colin@cozybit.com Cc: javier@cozybit.com Signed-off-by: John W. Linville --- include/linux/nl80211.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 92f79d2bdd8c..04d4516f9c71 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -3,7 +3,26 @@ /* * 802.11 netlink interface public header * - * Copyright 2006, 2007 Johannes Berg + * Copyright 2006, 2007, 2008 Johannes Berg + * Copyright 2008 Michael Wu + * Copyright 2008 Luis Carlos Cobo + * Copyright 2008 Michael Buesch + * Copyright 2008 Luis R. Rodriguez + * Copyright 2008 Jouni Malinen + * Copyright 2008 Colin McCabe + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * */ /** -- cgit v1.2.3 From fee52678dbda2099a25243e79da98dc390e1939a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Nov 2008 22:36:31 +0100 Subject: cfg80211: handle SIOCGIWNAME This patch moves the SIOCGIWNAME handling from mac80211 to cfg80211. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 7 ++++++ net/mac80211/wext.c | 44 +--------------------------------- net/wireless/Makefile | 1 + net/wireless/wext-compat.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 69 insertions(+), 43 deletions(-) create mode 100644 net/wireless/wext-compat.c (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 53b06f6c62ca..c97eac34ec01 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5,6 +5,8 @@ #include #include #include +/* remove once we remove the wext stuff */ +#include /* * 802.11 configuration in-kernel interface @@ -524,4 +526,9 @@ struct cfg80211_ops { enum nl80211_sec_chan_offset); }; +/* temporary wext handlers */ +int cfg80211_wext_giwname(struct net_device *dev, + struct iw_request_info *info, + char *name, char *extra); + #endif /* __NET_CFG80211_H */ diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index b3ce28d35611..b9eee3c903de 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -135,48 +135,6 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev, return -EOPNOTSUPP; } -static int ieee80211_ioctl_giwname(struct net_device *dev, - struct iw_request_info *info, - char *name, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_supported_band *sband; - u8 is_ht = 0, is_a = 0, is_b = 0, is_g = 0; - - - sband = local->hw.wiphy->bands[IEEE80211_BAND_5GHZ]; - if (sband) { - is_a = 1; - is_ht |= sband->ht_cap.ht_supported; - } - - sband = local->hw.wiphy->bands[IEEE80211_BAND_2GHZ]; - if (sband) { - int i; - /* Check for mandatory rates */ - for (i = 0; i < sband->n_bitrates; i++) { - if (sband->bitrates[i].bitrate == 10) - is_b = 1; - if (sband->bitrates[i].bitrate == 60) - is_g = 1; - } - is_ht |= sband->ht_cap.ht_supported; - } - - strcpy(name, "IEEE 802.11"); - if (is_a) - strcat(name, "a"); - if (is_b) - strcat(name, "b"); - if (is_g) - strcat(name, "g"); - if (is_ht) - strcat(name, "n"); - - return 0; -} - - static int ieee80211_ioctl_giwrange(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *extra) @@ -1146,7 +1104,7 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev, static const iw_handler ieee80211_handler[] = { (iw_handler) NULL, /* SIOCSIWCOMMIT */ - (iw_handler) ieee80211_ioctl_giwname, /* SIOCGIWNAME */ + (iw_handler) cfg80211_wext_giwname, /* SIOCGIWNAME */ (iw_handler) NULL, /* SIOCSIWNWID */ (iw_handler) NULL, /* SIOCGIWNWID */ (iw_handler) ieee80211_ioctl_siwfreq, /* SIOCSIWFREQ */ diff --git a/net/wireless/Makefile b/net/wireless/Makefile index cc547edb111f..9bc412c83430 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -6,4 +6,5 @@ obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o +cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o cfg80211-$(CONFIG_NL80211) += nl80211.o diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c new file mode 100644 index 000000000000..a5db4551a31a --- /dev/null +++ b/net/wireless/wext-compat.c @@ -0,0 +1,60 @@ +/* + * cfg80211 - wext compat code + * + * This is temporary code until all wireless functionality is migrated + * into cfg80211, when that happens all the exports here go away and + * we directly assign the wireless handlers of wireless interfaces. + * + * Copyright 2008 Johannes Berg + */ + +#include +#include +#include +#include +#include +#include "core.h" + +int cfg80211_wext_giwname(struct net_device *dev, + struct iw_request_info *info, + char *name, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct ieee80211_supported_band *sband; + bool is_ht = false, is_a = false, is_b = false, is_g = false; + + if (!wdev) + return -EOPNOTSUPP; + + sband = wdev->wiphy->bands[IEEE80211_BAND_5GHZ]; + if (sband) { + is_a = true; + is_ht |= sband->ht_cap.ht_supported; + } + + sband = wdev->wiphy->bands[IEEE80211_BAND_2GHZ]; + if (sband) { + int i; + /* Check for mandatory rates */ + for (i = 0; i < sband->n_bitrates; i++) { + if (sband->bitrates[i].bitrate == 10) + is_b = true; + if (sband->bitrates[i].bitrate == 60) + is_g = true; + } + is_ht |= sband->ht_cap.ht_supported; + } + + strcpy(name, "IEEE 802.11"); + if (is_a) + strcat(name, "a"); + if (is_b) + strcat(name, "b"); + if (is_g) + strcat(name, "g"); + if (is_ht) + strcat(name, "n"); + + return 0; +} +EXPORT_SYMBOL(cfg80211_wext_giwname); -- cgit v1.2.3 From e60c7744f8aa77bcbcb0b294596d6c87445d1200 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Nov 2008 23:31:40 +0100 Subject: cfg80211: handle SIOCGIWMODE/SIOCSIWMODE further reducing wext code in mac80211. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 4 +++ net/mac80211/iface.c | 4 +++ net/mac80211/wext.c | 76 ++------------------------------------------ net/wireless/wext-compat.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 74 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c97eac34ec01..a0c0bf19496c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -530,5 +530,9 @@ struct cfg80211_ops { int cfg80211_wext_giwname(struct net_device *dev, struct iw_request_info *info, char *name, char *extra); +int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, + u32 *mode, char *extra); +int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, + u32 *mode, char *extra); #endif /* __NET_CFG80211_H */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 9ab772ac74ae..5abbc3f07dd6 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -698,6 +698,10 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, if (type == sdata->vif.type) return 0; + /* Setting ad-hoc mode on non-IBSS channel is not supported. */ + if (sdata->local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS) + return -EOPNOTSUPP; + /* * We could, here, on changes between IBSS/STA/MESH modes, * invoke an MLME function instead that disassociates etc. diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index b9eee3c903de..4e1fdcfacb0c 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -224,78 +224,6 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, } -static int ieee80211_ioctl_siwmode(struct net_device *dev, - struct iw_request_info *info, - __u32 *mode, char *extra) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; - int type; - - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - return -EOPNOTSUPP; - - switch (*mode) { - case IW_MODE_INFRA: - type = NL80211_IFTYPE_STATION; - break; - case IW_MODE_ADHOC: - /* Setting ad-hoc mode on non ibss channel is not - * supported. - */ - if (local->oper_channel && - (local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS)) - return -EOPNOTSUPP; - - type = NL80211_IFTYPE_ADHOC; - break; - case IW_MODE_REPEAT: - type = NL80211_IFTYPE_WDS; - break; - case IW_MODE_MONITOR: - type = NL80211_IFTYPE_MONITOR; - break; - default: - return -EINVAL; - } - - return ieee80211_if_change_type(sdata, type); -} - - -static int ieee80211_ioctl_giwmode(struct net_device *dev, - struct iw_request_info *info, - __u32 *mode, char *extra) -{ - struct ieee80211_sub_if_data *sdata; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - switch (sdata->vif.type) { - case NL80211_IFTYPE_AP: - *mode = IW_MODE_MASTER; - break; - case NL80211_IFTYPE_STATION: - *mode = IW_MODE_INFRA; - break; - case NL80211_IFTYPE_ADHOC: - *mode = IW_MODE_ADHOC; - break; - case NL80211_IFTYPE_MONITOR: - *mode = IW_MODE_MONITOR; - break; - case NL80211_IFTYPE_WDS: - *mode = IW_MODE_REPEAT; - break; - case NL80211_IFTYPE_AP_VLAN: - *mode = IW_MODE_SECOND; /* FIXME */ - break; - default: - *mode = IW_MODE_AUTO; - break; - } - return 0; -} - static int ieee80211_ioctl_siwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *freq, char *extra) @@ -1109,8 +1037,8 @@ static const iw_handler ieee80211_handler[] = (iw_handler) NULL, /* SIOCGIWNWID */ (iw_handler) ieee80211_ioctl_siwfreq, /* SIOCSIWFREQ */ (iw_handler) ieee80211_ioctl_giwfreq, /* SIOCGIWFREQ */ - (iw_handler) ieee80211_ioctl_siwmode, /* SIOCSIWMODE */ - (iw_handler) ieee80211_ioctl_giwmode, /* SIOCGIWMODE */ + (iw_handler) cfg80211_wext_siwmode, /* SIOCSIWMODE */ + (iw_handler) cfg80211_wext_giwmode, /* SIOCGIWMODE */ (iw_handler) NULL, /* SIOCSIWSENS */ (iw_handler) NULL, /* SIOCGIWSENS */ (iw_handler) NULL /* not used */, /* SIOCSIWRANGE */ diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index a5db4551a31a..58e489fd4aed 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -58,3 +58,82 @@ int cfg80211_wext_giwname(struct net_device *dev, return 0; } EXPORT_SYMBOL(cfg80211_wext_giwname); + +int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, + u32 *mode, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev; + struct vif_params vifparams; + enum nl80211_iftype type; + + if (!wdev) + return -EOPNOTSUPP; + + rdev = wiphy_to_dev(wdev->wiphy); + + if (!rdev->ops->change_virtual_intf) + return -EOPNOTSUPP; + + /* don't support changing VLANs, you just re-create them */ + if (wdev->iftype == NL80211_IFTYPE_AP_VLAN) + return -EOPNOTSUPP; + + switch (*mode) { + case IW_MODE_INFRA: + type = NL80211_IFTYPE_STATION; + break; + case IW_MODE_ADHOC: + type = NL80211_IFTYPE_ADHOC; + break; + case IW_MODE_REPEAT: + type = NL80211_IFTYPE_WDS; + break; + case IW_MODE_MONITOR: + type = NL80211_IFTYPE_MONITOR; + break; + default: + return -EINVAL; + } + + memset(&vifparams, 0, sizeof(vifparams)); + + return rdev->ops->change_virtual_intf(wdev->wiphy, dev->ifindex, type, + NULL, &vifparams); +} +EXPORT_SYMBOL(cfg80211_wext_siwmode); + +int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, + u32 *mode, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + + if (!wdev) + return -EOPNOTSUPP; + + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + *mode = IW_MODE_MASTER; + break; + case NL80211_IFTYPE_STATION: + *mode = IW_MODE_INFRA; + break; + case NL80211_IFTYPE_ADHOC: + *mode = IW_MODE_ADHOC; + break; + case NL80211_IFTYPE_MONITOR: + *mode = IW_MODE_MONITOR; + break; + case NL80211_IFTYPE_WDS: + *mode = IW_MODE_REPEAT; + break; + case NL80211_IFTYPE_AP_VLAN: + *mode = IW_MODE_SECOND; /* FIXME */ + break; + default: + *mode = IW_MODE_AUTO; + break; + } + return 0; +} +EXPORT_SYMBOL(cfg80211_wext_giwmode); -- cgit v1.2.3 From 007e5ddddfed4ba039899754936e89b27d5cb551 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 27 Nov 2008 23:13:38 +0100 Subject: wireless: clean up radiotap a bit No need to pad the header so no constant needed for that, no need to carry any version number from netbsd nor CVS IDs from them. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ipw2x00/ipw2200.c | 24 +++--------------------- drivers/net/wireless/libertas/radiotap.h | 3 --- include/net/ieee80211_radiotap.h | 15 +++------------ 3 files changed, 6 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c index d2a2b7586d08..48fa6df3a774 100644 --- a/drivers/net/wireless/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/ipw2x00/ipw2200.c @@ -7797,15 +7797,6 @@ static void ipw_handle_data_packet_monitor(struct ipw_priv *priv, memmove(rxb->skb->data + sizeof(struct ipw_rt_hdr), rxb->skb->data + IPW_RX_FRAME_SIZE, len); - /* Zero the radiotap static buffer ... We only need to zero the bytes NOT - * part of our real header, saves a little time. - * - * No longer necessary since we fill in all our data. Purge before merging - * patch officially. - * memset(rxb->skb->data + sizeof(struct ipw_rt_hdr), 0, - * IEEE80211_RADIOTAP_HDRLEN - sizeof(struct ipw_rt_hdr)); - */ - ipw_rt = (struct ipw_rt_hdr *)rxb->skb->data; ipw_rt->rt_hdr.it_version = PKTHDR_RADIOTAP_VERSION; @@ -8013,15 +8004,6 @@ static void ipw_handle_promiscuous_rx(struct ipw_priv *priv, memcpy(ipw_rt->payload, hdr, len); - /* Zero the radiotap static buffer ... We only need to zero the bytes - * NOT part of our real header, saves a little time. - * - * No longer necessary since we fill in all our data. Purge before - * merging patch officially. - * memset(rxb->skb->data + sizeof(struct ipw_rt_hdr), 0, - * IEEE80211_RADIOTAP_HDRLEN - sizeof(struct ipw_rt_hdr)); - */ - ipw_rt->rt_hdr.it_version = PKTHDR_RADIOTAP_VERSION; ipw_rt->rt_hdr.it_pad = 0; /* always good to zero */ ipw_rt->rt_hdr.it_len = cpu_to_le16(sizeof(*ipw_rt)); /* total header+data */ @@ -10409,9 +10391,9 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv, } else len = src->len; - dst = alloc_skb( - len + IEEE80211_RADIOTAP_HDRLEN, GFP_ATOMIC); - if (!dst) continue; + dst = alloc_skb(len + sizeof(*rt_hdr), GFP_ATOMIC); + if (!dst) + continue; rt_hdr = (void *)skb_put(dst, sizeof(*rt_hdr)); diff --git a/drivers/net/wireless/libertas/radiotap.h b/drivers/net/wireless/libertas/radiotap.h index 5d118f40cfbc..f8eb9097ff0a 100644 --- a/drivers/net/wireless/libertas/radiotap.h +++ b/drivers/net/wireless/libertas/radiotap.h @@ -6,9 +6,6 @@ struct tx_radiotap_hdr { u8 txpower; u8 rts_retries; u8 data_retries; -#if 0 - u8 pad[IEEE80211_RADIOTAP_HDRLEN - 12]; -#endif } __attribute__ ((packed)); #define TX_RADIOTAP_PRESENT ( \ diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index d364fd594ea4..384698cb773a 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -1,7 +1,4 @@ -/* $FreeBSD: src/sys/net80211/ieee80211_radiotap.h,v 1.5 2005/01/22 20:12:05 sam Exp $ */ -/* $NetBSD: ieee80211_radiotap.h,v 1.11 2005/06/22 06:16:02 dyoung Exp $ */ - -/*- +/* * Copyright (c) 2003, 2004 David Young. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,8 +39,6 @@ #include #include -/* Radiotap header version (from official NetBSD feed) */ -#define IEEE80211RADIOTAP_VERSION "1.5" /* Base version of the radiotap packet header data */ #define PKTHDR_RADIOTAP_VERSION 0 @@ -62,12 +57,8 @@ * readers. */ -/* XXX tcpdump/libpcap do not tolerate variable-length headers, - * yet, so we pad every radiotap header to 64 bytes. Ugh. - */ -#define IEEE80211_RADIOTAP_HDRLEN 64 - -/* The radio capture header precedes the 802.11 header. +/* + * The radio capture header precedes the 802.11 header. * All data in the header is little endian on all platforms. */ struct ieee80211_radiotap_header { -- cgit v1.2.3 From 4571d3bf87b76eae875283ff9f7243984b5ddcae Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sun, 30 Nov 2008 00:48:41 +0100 Subject: mac80211: add sta_notify_ps callback This patch is necessary in order to provide a proper Access point support for p54. Unfortunately for us, there is no documented way to disable the interfering power save buffering mechanism in firmware completely. Therefore we give in and notify the driver through our new sta_notify_ps callback, so that we can update the filter state. Signed-off-by: Christian Lamparter Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 18 ++++++++++++++++++ net/mac80211/rx.c | 7 +++++++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6e823cc6a4b1..0a0c59365db6 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -781,6 +781,19 @@ enum sta_notify_cmd { STA_NOTIFY_ADD, STA_NOTIFY_REMOVE }; +/** + * enum sta_notify_ps_cmd - sta power save notify command + * + * Used with the sta_notify_ps() callback in &struct ieee80211_ops to + * notify the driver if a station made a power state transition. + * + * @STA_NOTIFY_SLEEP: a station is now sleeping + * @STA_NOTIFY_AWAKE: a sleeping station woke up + */ +enum sta_notify_ps_cmd { + STA_NOTIFY_SLEEP, STA_NOTIFY_AWAKE, +}; + /** * enum ieee80211_tkip_key_type - get tkip key * @@ -1251,6 +1264,9 @@ enum ieee80211_ampdu_mlme_action { * @sta_notify: Notifies low level driver about addition or removal * of associated station or AP. * + * @sta_ps_notify: Notifies low level driver about the power state transition + * of a associated station. Must be atomic. + * * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max), * bursting) for a hardware TX queue. * @@ -1317,6 +1333,8 @@ struct ieee80211_ops { int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value); void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); + void (*sta_notify_ps)(struct ieee80211_hw *hw, + enum sta_notify_ps_cmd, struct ieee80211_sta *sta); int (*conf_tx)(struct ieee80211_hw *hw, u16 queue, const struct ieee80211_tx_queue_params *params); int (*get_tx_stats)(struct ieee80211_hw *hw, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 68a6e973c72c..14be095b8528 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -654,9 +654,13 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) static void ap_sta_ps_start(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; atomic_inc(&sdata->bss->num_sta_ps); set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL); + if (local->ops->sta_notify_ps) + local->ops->sta_notify_ps(local_to_hw(local), STA_NOTIFY_SLEEP, + &sta->sta); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n", sdata->dev->name, sta->sta.addr, sta->sta.aid); @@ -673,6 +677,9 @@ static int ap_sta_ps_end(struct sta_info *sta) atomic_dec(&sdata->bss->num_sta_ps); clear_sta_flags(sta, WLAN_STA_PS | WLAN_STA_PSPOLL); + if (local->ops->sta_notify_ps) + local->ops->sta_notify_ps(local_to_hw(local), STA_NOTIFY_AWAKE, + &sta->sta); if (!skb_queue_empty(&sta->ps_tx_buf)) sta_info_clear_tim_bit(sta); -- cgit v1.2.3 From 8bef7a10014c4579c66579ab47fc1bb9563ac42a Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Sun, 30 Nov 2008 20:56:28 +0200 Subject: mac80211: document ieee80211_tx_info.pad Fixes htmldocs warning: Warning(mac80211.h:379): No description found for parameter 'pad[2]' Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- include/net/mac80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0a0c59365db6..e84c922a1b16 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -323,6 +323,7 @@ struct ieee80211_tx_rate { * @flags: transmit info flags, defined above * @band: the band to transmit on (use for checking for races) * @antenna_sel_tx: antenna to use, 0 for automatic diversity + * @pad: padding, ignore * @control: union for control data * @status: union for status data * @driver_data: array of driver_data pointers -- cgit v1.2.3 From b74ca3a896b9ab5f952bc440154758e708c48884 Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Mon, 8 Dec 2008 01:14:16 -0800 Subject: netdevice: Kill netdev->priv This is the last shoot of this series. After I removing all directly reference of netdev->priv, I am killing "priv" of "struct net_device" and fixing relative comments/docs. Anyone will not be allowed to reference netdev->priv directly. If you want to reference the memory of private data, use netdev_priv() instead. If the private data is not allocted when alloc_netdev(), use netdev->ml_priv to point that memory after you creating that private data. Signed-off-by: Wang Chen Signed-off-by: David S. Miller --- Documentation/networking/driver.txt | 2 +- Documentation/networking/netdevices.txt | 2 +- drivers/net/3c501.h | 2 +- drivers/net/atp.c | 2 +- drivers/net/eexpress.c | 2 +- drivers/net/forcedeth.c | 4 ++-- drivers/net/lance.c | 2 +- drivers/net/myri_sbus.c | 2 +- drivers/net/pci-skeleton.c | 2 +- drivers/net/sun3_82586.c | 2 +- drivers/net/sunbmac.c | 2 +- drivers/net/tokenring/3c359.c | 5 +++-- drivers/net/via-rhine.c | 9 +++++---- drivers/net/wireless/strip.c | 2 +- include/linux/hdlc.h | 2 +- include/linux/netdevice.h | 1 - net/atm/mpc.c | 4 ++-- net/core/dev.c | 6 ------ 18 files changed, 24 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/Documentation/networking/driver.txt b/Documentation/networking/driver.txt index ea72d2e66ca8..03283daa64fe 100644 --- a/Documentation/networking/driver.txt +++ b/Documentation/networking/driver.txt @@ -13,7 +13,7 @@ Transmit path guidelines: static int drv_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { - struct drv *dp = dev->priv; + struct drv *dp = netdev_priv(dev); lock_tx(dp); ... diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt index d0f71fc7f782..a2ab6a0b116d 100644 --- a/Documentation/networking/netdevices.txt +++ b/Documentation/networking/netdevices.txt @@ -18,7 +18,7 @@ There are routines in net_init.c to handle the common cases of alloc_etherdev, alloc_netdev. These reserve extra space for driver private data which gets freed when the network device is freed. If separately allocated data is attached to the network device -(dev->priv) then it is up to the module exit handler to free that. +(netdev_priv(dev)) then it is up to the module exit handler to free that. MTU === diff --git a/drivers/net/3c501.h b/drivers/net/3c501.h index cfec64efff78..f40b0493337a 100644 --- a/drivers/net/3c501.h +++ b/drivers/net/3c501.h @@ -23,7 +23,7 @@ static const struct ethtool_ops netdev_ethtool_ops; static int el_debug = EL_DEBUG; /* - * Board-specific info in dev->priv. + * Board-specific info in netdev_priv(dev). */ struct net_local diff --git a/drivers/net/atp.c b/drivers/net/atp.c index 7028b276dfd3..1d6b74c5d6c9 100644 --- a/drivers/net/atp.c +++ b/drivers/net/atp.c @@ -420,7 +420,7 @@ static unsigned short __init eeprom_op(long ioaddr, u32 cmd) registers that "should" only need to be set once at boot, so that there is non-reboot way to recover if something goes wrong. - This is an attachable device: if there is no dev->priv entry then it wasn't + This is an attachable device: if there is no private entry then it wasn't probed for at boot-time, and we need to probe for it again. */ static int net_open(struct net_device *dev) diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c index a125e41240f5..9ff3f2f5e382 100644 --- a/drivers/net/eexpress.c +++ b/drivers/net/eexpress.c @@ -1046,7 +1046,7 @@ static void eexp_hw_tx_pio(struct net_device *dev, unsigned short *buf, /* * Sanity check the suspected EtherExpress card * Read hardware address, reset card, size memory and initialize buffer - * memory pointers. These are held in dev->priv, in case someone has more + * memory pointers. These are held in netdev_priv(), in case someone has more * than one card in a machine. */ diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 12384df8cb2b..1f2b24743ee9 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -712,12 +712,12 @@ struct nv_skb_map { /* * SMP locking: - * All hardware access under dev->priv->lock, except the performance + * All hardware access under netdev_priv(dev)->lock, except the performance * critical parts: * - rx is (pseudo-) lockless: it relies on the single-threading provided * by the arch code for interrupts. * - tx setup is lockless: it relies on netif_tx_lock. Actual submission - * needs dev->priv->lock :-( + * needs netdev_priv(dev)->lock :-( * - set_multicast_list: preparation lockless, relies on netif_tx_lock. */ diff --git a/drivers/net/lance.c b/drivers/net/lance.c index e81b6113ed94..d7afb938ea62 100644 --- a/drivers/net/lance.c +++ b/drivers/net/lance.c @@ -519,7 +519,7 @@ static int __init lance_probe1(struct net_device *dev, int ioaddr, int irq, int } } - /* We can't allocate dev->priv from alloc_etherdev() because it must + /* We can't allocate private data from alloc_etherdev() because it must a ISA DMA-able region. */ chipname = chip_table[lance_version].name; printk("%s: %s at %#3x, ", dev->name, chipname, ioaddr); diff --git a/drivers/net/myri_sbus.c b/drivers/net/myri_sbus.c index 6833f65f8aec..899ed065a147 100644 --- a/drivers/net/myri_sbus.c +++ b/drivers/net/myri_sbus.c @@ -1091,7 +1091,7 @@ static int __devinit myri_sbus_probe(struct of_device *op, const struct of_devic err_free_irq: free_irq(dev->irq, dev); err: - /* This will also free the co-allocated 'dev->priv' */ + /* This will also free the co-allocated private data*/ free_netdev(dev); return -ENODEV; } diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c index b23b5c397b1d..c95fd72c3bb9 100644 --- a/drivers/net/pci-skeleton.c +++ b/drivers/net/pci-skeleton.c @@ -781,7 +781,7 @@ static int __devinit netdrv_init_one (struct pci_dev *pdev, dev->irq = pdev->irq; dev->base_addr = (unsigned long) ioaddr; - /* dev->priv/tp zeroed and aligned in alloc_etherdev */ + /* netdev_priv()/tp zeroed and aligned in alloc_etherdev */ tp = netdev_priv(dev); /* note: tp->chipset set in netdrv_init_board */ diff --git a/drivers/net/sun3_82586.c b/drivers/net/sun3_82586.c index e8f97d5c9c23..e0d84772771c 100644 --- a/drivers/net/sun3_82586.c +++ b/drivers/net/sun3_82586.c @@ -209,7 +209,7 @@ static int sun3_82586_open(struct net_device *dev) static int check586(struct net_device *dev,char *where,unsigned size) { struct priv pb; - struct priv *p = /* (struct priv *) dev->priv*/ &pb; + struct priv *p = &pb; char *iscp_addr; int i; diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c index 977b3e08bbfc..7f69c7f176c4 100644 --- a/drivers/net/sunbmac.c +++ b/drivers/net/sunbmac.c @@ -1233,7 +1233,7 @@ fail_and_cleanup: bp->bmac_block, bp->bblock_dvma); - /* This also frees the co-located 'dev->priv' */ + /* This also frees the co-located private data */ free_netdev(dev); return -ENODEV; } diff --git a/drivers/net/tokenring/3c359.c b/drivers/net/tokenring/3c359.c index e7a944657cf8..43853e3b210e 100644 --- a/drivers/net/tokenring/3c359.c +++ b/drivers/net/tokenring/3c359.c @@ -296,8 +296,9 @@ static int __devinit xl_probe(struct pci_dev *pdev, } ; /* - * Allowing init_trdev to allocate the dev->priv structure will align xl_private - * on a 32 bytes boundary which we need for the rx/tx descriptors + * Allowing init_trdev to allocate the private data will align + * xl_private on a 32 bytes boundary which we need for the rx/tx + * descriptors */ dev = alloc_trdev(sizeof(struct xl_private)) ; diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c index 93b74b7b7077..8d405c83df8b 100644 --- a/drivers/net/via-rhine.c +++ b/drivers/net/via-rhine.c @@ -191,12 +191,13 @@ IIId. Synchronization The driver runs as two independent, single-threaded flows of control. One is the send-packet routine, which enforces single-threaded use by the -dev->priv->lock spinlock. The other thread is the interrupt handler, which -is single threaded by the hardware and interrupt handling software. +netdev_priv(dev)->lock spinlock. The other thread is the interrupt handler, +which is single threaded by the hardware and interrupt handling software. The send packet thread has partial control over the Tx ring. It locks the -dev->priv->lock whenever it's queuing a Tx packet. If the next slot in the ring -is not available it stops the transmit queue by calling netif_stop_queue. +netdev_priv(dev)->lock whenever it's queuing a Tx packet. If the next slot in +the ring is not available it stops the transmit queue by +calling netif_stop_queue. The interrupt handler has exclusive control over the Rx ring and records stats from the Tx ring. After reaping the stats, it marks the Tx queue entry as diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index 692e6c5e009a..dd0de3a9ed4e 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -2494,7 +2494,7 @@ static void strip_dev_setup(struct net_device *dev) dev->type = ARPHRD_METRICOM; /* dtang */ dev->hard_header_len = sizeof(STRIP_Header); /* - * dev->priv Already holds a pointer to our struct strip + * netdev_priv(dev) Already holds a pointer to our struct strip */ *(MetricomAddress *) & dev->broadcast = broadcast_address; diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h index e960faac609d..fd47a151665e 100644 --- a/include/linux/hdlc.h +++ b/include/linux/hdlc.h @@ -43,7 +43,7 @@ struct hdlc_proto { }; -/* Pointed to by dev->priv */ +/* Pointed to by netdev_priv(dev) */ typedef struct hdlc_device { /* used by HDLC layer to take control over HDLC device from hw driver*/ int (*attach)(struct net_device *dev, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0df0db068ac3..47e731528315 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -785,7 +785,6 @@ struct net_device /* * One part is mostly used on xmit path (device) */ - void *priv; /* pointer to private data */ /* These may be needed for future network-power-down code. */ unsigned long trans_start; /* Time (in jiffies) of last Tx */ diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 12e9ea371db1..039d5cc72c3d 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -341,8 +341,8 @@ static const char *mpoa_device_type_string(char type) } /* - * lec device calls this via its dev->priv->lane2_ops->associate_indicator() - * when it sees a TLV in LE_ARP packet. + * lec device calls this via its netdev_priv(dev)->lane2_ops + * ->associate_indicator() when it sees a TLV in LE_ARP packet. * We fill in the pointer above when we see a LANE2 lec initializing * See LANE2 spec 3.1.5 * diff --git a/net/core/dev.c b/net/core/dev.c index 4615e9a443aa..f54cac76438a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4378,12 +4378,6 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, dev->num_tx_queues = queue_count; dev->real_num_tx_queues = queue_count; - if (sizeof_priv) { - dev->priv = ((char *)dev + - ((sizeof(struct net_device) + NETDEV_ALIGN_CONST) - & ~NETDEV_ALIGN_CONST)); - } - dev->gso_max_size = GSO_MAX_SIZE; netdev_init_queues(dev); -- cgit v1.2.3 From 0049bab5e765aa74cf767a834fa336e19453fc5e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 8 Dec 2008 01:18:05 -0800 Subject: dccp: Remove obsolete parts of the old CCID interface The TX/RX CCIDs of the minisock are now redundant: similar to the Ack Vector case, their value equals initially that of the sysctl, but at the end of feature negotiation may be something different. The old interface removed by this patch thus has been replaced by the newer interface to dynamically query the currently loaded CCIDs. Also removed are the constructors for the TX CCID and the RX CCID, since the switch "rx <-> non-rx" is done by the handler in minisocks.c (and the handler is the only place in the code where CCIDs are loaded). Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- Documentation/networking/dccp.txt | 5 +++-- include/linux/dccp.h | 3 --- net/dccp/ccid.c | 14 -------------- net/dccp/ccid.h | 5 ----- net/dccp/feat.c | 13 ------------- net/dccp/minisocks.c | 2 -- 6 files changed, 3 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt index 610083ff73f6..a203d132dbef 100644 --- a/Documentation/networking/dccp.txt +++ b/Documentation/networking/dccp.txt @@ -140,10 +140,11 @@ send_ackvec = 1 Whether or not to send Ack Vector options (sec. 11.5). tx_ccid = 2 - Default CCID for the sender-receiver half-connection. + Default CCID for the sender-receiver half-connection. Depending on the + choice of CCID, the Send Ack Vector feature is enabled automatically. rx_ccid = 2 - Default CCID for the receiver-sender half-connection. + Default CCID for the receiver-sender half-connection; see tx_ccid. seq_window = 100 The initial sequence window (sec. 7.5.2). diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 6a72ff52a8a4..46daea312d92 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -370,7 +370,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_ccid - Congestion Control Id (CCID) (section 10) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) * @dccpms_pending - List of features being negotiated @@ -378,8 +377,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) */ struct dccp_minisock { __u64 dccpms_sequence_window; - __u8 dccpms_rx_ccid; - __u8 dccpms_tx_ccid; __u8 dccpms_send_ack_vector; __u8 dccpms_send_ndp_count; struct list_head dccpms_pending; diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 647cb0614f84..bcc643f992ae 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -253,20 +253,6 @@ out_module_put: EXPORT_SYMBOL_GPL(ccid_new); -struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, gfp_t gfp) -{ - return ccid_new(id, sk, 1, gfp); -} - -EXPORT_SYMBOL_GPL(ccid_hc_rx_new); - -struct ccid *ccid_hc_tx_new(unsigned char id,struct sock *sk, gfp_t gfp) -{ - return ccid_new(id, sk, 0, gfp); -} - -EXPORT_SYMBOL_GPL(ccid_hc_tx_new); - static void ccid_delete(struct ccid *ccid, struct sock *sk, int rx) { struct ccid_operations *ccid_ops; diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h index 803343aed004..18f69423a708 100644 --- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -111,11 +111,6 @@ extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp); -extern struct ccid *ccid_hc_rx_new(unsigned char id, struct sock *sk, - gfp_t gfp); -extern struct ccid *ccid_hc_tx_new(unsigned char id, struct sock *sk, - gfp_t gfp); - static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp) { struct ccid *ccid = dp->dccps_hc_rx_ccid; diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 077f78d579c4..a0d5891a37bf 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -1124,22 +1124,9 @@ int dccp_feat_init(struct sock *sk) INIT_LIST_HEAD(&dmsk->dccpms_pending); /* XXX no longer used */ INIT_LIST_HEAD(&dmsk->dccpms_conf); /* XXX no longer used */ - /* CCID L */ - rc = __feat_register_sp(&dp->dccps_featneg, DCCPF_CCID, 1, 0, - &dmsk->dccpms_tx_ccid, 1); - if (rc) - goto out; - - /* CCID R */ - rc = __feat_register_sp(&dp->dccps_featneg, DCCPF_CCID, 0, 0, - &dmsk->dccpms_rx_ccid, 1); - if (rc) - goto out; - /* Ack ratio */ rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0, dp->dccps_l_ack_ratio); -out: return rc; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 308b6b928c3d..210c346899ba 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -45,8 +45,6 @@ EXPORT_SYMBOL_GPL(dccp_death_row); void dccp_minisock_init(struct dccp_minisock *dmsk) { dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; - dmsk->dccpms_rx_ccid = sysctl_dccp_feat_rx_ccid; - dmsk->dccpms_tx_ccid = sysctl_dccp_feat_tx_ccid; dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count; } -- cgit v1.2.3 From 4098dce5be537a157eed4a326efd464109825b8b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 8 Dec 2008 01:18:37 -0800 Subject: dccp: Remove manual influence on NDP Count feature Updating the NDP count feature is handled automatically now: * for CCID-2 it is disabled, since the code does not use NDP counts; * for CCID-3 it is enabled, as NDP counts are used to determine loss lengths. Allowing the user to change NDP values leads to unpredictable and failing behaviour, since it is then possible to disable NDP counts even when they are needed (e.g. in CCID-3). This means that only those user settings are sensible that agree with the values for Send NDP Count implied by the choice of CCID. But those settings are already activated by the feature negotiation (CCID dependency tracking), hence this form of support is redundant. At startup the initialisation of the NDP count feature uses the default value of 0, which is done implicitly by the zeroing-out of the socket when it is allocated. If the choice of CCID or feature negotiation enables NDP count, this will then be updated via the NDP activation handler. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- Documentation/networking/dccp.txt | 3 --- include/linux/dccp.h | 4 ++-- net/dccp/dccp.h | 1 - net/dccp/feat.c | 2 +- net/dccp/minisocks.c | 1 - net/dccp/options.c | 4 +--- net/dccp/sysctl.c | 7 ------- 7 files changed, 4 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt index a203d132dbef..1403745ab406 100644 --- a/Documentation/networking/dccp.txt +++ b/Documentation/networking/dccp.txt @@ -133,9 +133,6 @@ retries2 importance for retransmitted acknowledgments and feature negotiation, data packets are never retransmitted. Analogue of tcp_retries2. -send_ndp = 1 - Whether or not to send NDP count options (sec. 7.7.2). - send_ackvec = 1 Whether or not to send Ack Vector options (sec. 11.5). diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 46daea312d92..60e94438eadd 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -371,14 +371,12 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) - * @dccpms_send_ndp_count - Send NDP Count Feature (7.7.2) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; __u8 dccpms_send_ack_vector; - __u8 dccpms_send_ndp_count; struct list_head dccpms_pending; struct list_head dccpms_conf; }; @@ -490,6 +488,7 @@ struct dccp_ackvec; * @dccps_r_ack_ratio - feature-remote Ack Ratio * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) + * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) @@ -529,6 +528,7 @@ struct dccp_sock { __u16 dccps_r_ack_ratio; __u8 dccps_pcslen:4; __u8 dccps_pcrlen:4; + __u8 dccps_send_ndp_count:1; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct dccp_minisock dccps_minisock; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 94f6785f81ec..e0759d098b9a 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -99,7 +99,6 @@ extern int sysctl_dccp_feat_sequence_window; extern int sysctl_dccp_feat_rx_ccid; extern int sysctl_dccp_feat_tx_ccid; extern int sysctl_dccp_feat_send_ack_vector; -extern int sysctl_dccp_feat_send_ndp_count; extern int sysctl_dccp_tx_qlen; extern int sysctl_dccp_sync_ratelimit; diff --git a/net/dccp/feat.c b/net/dccp/feat.c index a0d5891a37bf..30f9fb76b921 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -85,7 +85,7 @@ static int dccp_hdlr_ackvec(struct sock *sk, u64 enable, bool rx) static int dccp_hdlr_ndp(struct sock *sk, u64 enable, bool rx) { if (!rx) - dccp_msk(sk)->dccpms_send_ndp_count = (enable > 0); + dccp_sk(sk)->dccps_send_ndp_count = (enable > 0); return 0; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 210c346899ba..02b14812464a 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -46,7 +46,6 @@ void dccp_minisock_init(struct dccp_minisock *dmsk) { dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; - dmsk->dccpms_send_ndp_count = sysctl_dccp_feat_send_ndp_count; } void dccp_time_wait(struct sock *sk, int state, int timeo) diff --git a/net/dccp/options.c b/net/dccp/options.c index debb1008c7ad..e9d674874b4e 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -27,7 +27,6 @@ int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID; int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; -int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; u64 dccp_decode_value_var(const u8 *bf, const u8 len) { @@ -531,8 +530,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_opt_len = 0; - if (dmsk->dccpms_send_ndp_count && - dccp_insert_option_ndp(sk, skb)) + if (dp->dccps_send_ndp_count && dccp_insert_option_ndp(sk, skb)) return -1; if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) { diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index f6e54f433e29..587c12f915c1 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -47,13 +47,6 @@ static struct ctl_table dccp_default_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "send_ndp", - .data = &sysctl_dccp_feat_send_ndp_count, - .maxlen = sizeof(sysctl_dccp_feat_send_ndp_count), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "request_retries", .data = &sysctl_dccp_request_retries, -- cgit v1.2.3 From 6fdd34d43bff8be9bb925b49d87a0ee144d2ab07 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 8 Dec 2008 01:19:06 -0800 Subject: dccp ccid-2: Phase out the use of boolean Ack Vector sysctl This removes the use of the sysctl and the minisock variable for the Send Ack Vector feature, as it now is handled fully dynamically via feature negotiation (i.e. when CCID-2 is enabled, Ack Vectors are automatically enabled as per RFC 4341, 4.). Using a sysctl in parallel to this implementation would open the door to crashes, since much of the code relies on tests of the boolean minisock / sysctl variable. Thus, this patch replaces all tests of type if (dccp_msk(sk)->dccpms_send_ack_vector) /* ... */ with if (dp->dccps_hc_rx_ackvec != NULL) /* ... */ The dccps_hc_rx_ackvec is allocated by the dccp_hdlr_ackvec() when feature negotiation concluded that Ack Vectors are to be used on the half-connection. Otherwise, it is NULL (due to dccp_init_sock/dccp_create_openreq_child), so that the test is a valid one. The activation handler for Ack Vectors is called as soon as the feature negotiation has concluded at the * server when the Ack marking the transition RESPOND => OPEN arrives; * client after it has sent its ACK, marking the transition REQUEST => PARTOPEN. Adding the sequence number of the Response packet to the Ack Vector has been removed, since (a) connection establishment implies that the Response has been received; (b) the CCIDs only look at packets received in the (PART)OPEN state, i.e. this entry will always be ignored; (c) it can not be used for anything useful - to detect loss for instance, only packets received after the loss can serve as pseudo-dupacks. There was a FIXME to change the error code when dccp_ackvec_add() fails. I removed this after finding out that: * the check whether ackno < ISN is already made earlier, * this Response is likely the 1st packet with an Ackno that the client gets, * so when dccp_ackvec_add() fails, the reason is likely not a packet error. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- Documentation/networking/dccp.txt | 3 --- include/linux/dccp.h | 3 --- net/dccp/dccp.h | 3 +-- net/dccp/diag.c | 2 +- net/dccp/input.c | 12 +++--------- net/dccp/minisocks.c | 1 - net/dccp/options.c | 7 ++----- net/dccp/proto.c | 3 +-- net/dccp/sysctl.c | 7 ------- 9 files changed, 8 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt index 1403745ab406..7a3bb1abb830 100644 --- a/Documentation/networking/dccp.txt +++ b/Documentation/networking/dccp.txt @@ -133,9 +133,6 @@ retries2 importance for retransmitted acknowledgments and feature negotiation, data packets are never retransmitted. Analogue of tcp_retries2. -send_ackvec = 1 - Whether or not to send Ack Vector options (sec. 11.5). - tx_ccid = 2 Default CCID for the sender-receiver half-connection. Depending on the choice of CCID, the Send Ack Vector feature is enabled automatically. diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 60e94438eadd..61734e27abb7 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -360,7 +360,6 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 #define DCCPF_INITIAL_ACK_RATIO 2 #define DCCPF_INITIAL_CCID DCCPC_CCID2 -#define DCCPF_INITIAL_SEND_ACK_VECTOR 1 /* FIXME: for now we're default to 1 but it should really be 0 */ #define DCCPF_INITIAL_SEND_NDP_COUNT 1 @@ -370,13 +369,11 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) * Will be used to pass the state from dccp_request_sock to dccp_sock. * * @dccpms_sequence_window - Sequence Window Feature (section 7.5.2) - * @dccpms_send_ack_vector - Send Ack Vector Feature (section 11.5) * @dccpms_pending - List of features being negotiated * @dccpms_conf - */ struct dccp_minisock { __u64 dccpms_sequence_window; - __u8 dccpms_send_ack_vector; struct list_head dccpms_pending; struct list_head dccpms_conf; }; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index e0759d098b9a..0bc4c9a02e19 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -98,7 +98,6 @@ extern int sysctl_dccp_retries2; extern int sysctl_dccp_feat_sequence_window; extern int sysctl_dccp_feat_rx_ccid; extern int sysctl_dccp_feat_tx_ccid; -extern int sysctl_dccp_feat_send_ack_vector; extern int sysctl_dccp_tx_qlen; extern int sysctl_dccp_sync_ratelimit; @@ -434,7 +433,7 @@ static inline int dccp_ack_pending(const struct sock *sk) const struct dccp_sock *dp = dccp_sk(sk); return dp->dccps_timestamp_echo != 0 || #ifdef CONFIG_IP_DCCP_ACKVEC - (dccp_msk(sk)->dccpms_send_ack_vector && + (dp->dccps_hc_rx_ackvec != NULL && dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || #endif inet_csk_ack_scheduled(sk); diff --git a/net/dccp/diag.c b/net/dccp/diag.c index d1e100395efb..652a1b67f727 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -29,7 +29,7 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_backoff = icsk->icsk_backoff; info->tcpi_pmtu = icsk->icsk_pmtu_cookie; - if (dccp_msk(sk)->dccpms_send_ack_vector) + if (dp->dccps_hc_rx_ackvec != NULL) info->tcpi_options |= TCPI_OPT_SACK; ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info); diff --git a/net/dccp/input.c b/net/dccp/input.c index 0672b7e1763e..5eb443f656c1 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -163,7 +163,7 @@ static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); - if (dccp_msk(sk)->dccpms_send_ack_vector) + if (dp->dccps_hc_rx_ackvec != NULL) dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, DCCP_SKB_CB(skb)->dccpd_ack_seq); } @@ -375,7 +375,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); - if (dccp_msk(sk)->dccpms_send_ack_vector && + if (dp->dccps_hc_rx_ackvec != NULL && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKVEC_STATE_RECEIVED)) @@ -434,12 +434,6 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp - dp->dccps_options_received.dccpor_timestamp_echo)); - if (dccp_msk(sk)->dccpms_send_ack_vector && - dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_ACKVEC_STATE_RECEIVED)) - goto out_invalid_packet; /* FIXME: change error code */ - /* Stop the REQUEST timer */ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); WARN_ON(sk->sk_send_head == NULL); @@ -637,7 +631,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); - if (dccp_msk(sk)->dccpms_send_ack_vector && + if (dp->dccps_hc_rx_ackvec != NULL && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKVEC_STATE_RECEIVED)) diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 02b14812464a..6821ae33dd37 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -45,7 +45,6 @@ EXPORT_SYMBOL_GPL(dccp_death_row); void dccp_minisock_init(struct dccp_minisock *dmsk) { dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; - dmsk->dccpms_send_ack_vector = sysctl_dccp_feat_send_ack_vector; } void dccp_time_wait(struct sock *sk, int state, int timeo) diff --git a/net/dccp/options.c b/net/dccp/options.c index e9d674874b4e..7b1165c21f51 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -26,7 +26,6 @@ int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID; int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; -int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; u64 dccp_decode_value_var(const u8 *bf, const u8 len) { @@ -145,8 +144,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, case DCCPO_ACK_VECTOR_1: if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ break; - - if (dccp_msk(sk)->dccpms_send_ack_vector && + if (dp->dccps_hc_rx_ackvec != NULL && dccp_ackvec_parse(sk, skb, &ackno, opt, value, len)) goto out_invalid_option; break; @@ -526,7 +524,6 @@ static void dccp_insert_option_padding(struct sk_buff *skb) int dccp_insert_options(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); - struct dccp_minisock *dmsk = dccp_msk(sk); DCCP_SKB_CB(skb)->dccpd_opt_len = 0; @@ -547,7 +544,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) if (dccp_insert_option_timestamp(sk, skb)) return -1; - } else if (dmsk->dccpms_send_ack_vector && + } else if (dp->dccps_hc_rx_ackvec != NULL && dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && dccp_insert_option_ackvec(sk, skb)) { return -1; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 0941f8fe1675..d5c2bacb713c 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -201,7 +201,6 @@ EXPORT_SYMBOL_GPL(dccp_init_sock); void dccp_destroy_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); - struct dccp_minisock *dmsk = dccp_msk(sk); /* * DCCP doesn't use sk_write_queue, just sk_send_head @@ -219,7 +218,7 @@ void dccp_destroy_sock(struct sock *sk) kfree(dp->dccps_service_list); dp->dccps_service_list = NULL; - if (dmsk->dccpms_send_ack_vector) { + if (dp->dccps_hc_rx_ackvec != NULL) { dccp_ackvec_free(dp->dccps_hc_rx_ackvec); dp->dccps_hc_rx_ackvec = NULL; } diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 587c12f915c1..018e210875e1 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -40,13 +40,6 @@ static struct ctl_table dccp_default_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "send_ackvec", - .data = &sysctl_dccp_feat_send_ack_vector, - .maxlen = sizeof(sysctl_dccp_feat_send_ack_vector), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { .procname = "request_retries", .data = &sysctl_dccp_request_retries, -- cgit v1.2.3 From 361b73d5c34f59c3fd107bb9dbe7a1fbff2c2517 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Mon, 8 Dec 2008 10:58:08 +0800 Subject: ring_buffer: fix comments Impact: comments cleanup fix incorrect comments for enum ring_buffer_type Signed-off-by: Lai Jiangshan Signed-off-by: Ingo Molnar --- include/linux/ring_buffer.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 1a350a847edd..d363467c8f13 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -28,17 +28,19 @@ struct ring_buffer_event { * size = 8 bytes * * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock - * array[0] = tv_nsec - * array[1] = tv_sec + * array[0] = tv_nsec + * array[1..2] = tv_sec * size = 16 bytes * * @RINGBUF_TYPE_DATA: Data record * If len is zero: * array[0] holds the actual length - * array[1..(length+3)/4-1] holds data + * array[1..(length+3)/4] holds data + * size = 4 + 4 + length (bytes) * else * length = len << 2 - * array[0..(length+3)/4] holds data + * array[0..(length+3)/4-1] holds data + * size = 4 + length (bytes) */ enum ring_buffer_type { RINGBUF_TYPE_PADDING, -- cgit v1.2.3 From 0b8f1efad30bd58f89961b82dfe68b9edf8fd2ac Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Dec 2008 18:58:31 -0800 Subject: sparse irq_desc[] array: core kernel and x86 changes Impact: new feature Problem on distro kernels: irq_desc[NR_IRQS] takes megabytes of RAM with NR_CPUS set to large values. The goal is to be able to scale up to much larger NR_IRQS value without impacting the (important) common case. To solve this, we generalize irq_desc[NR_IRQS] to an (optional) array of irq_desc pointers. When CONFIG_SPARSE_IRQ=y is used, we use kzalloc_node to get irq_desc, this also makes the IRQ descriptors NUMA-local (to the site that calls request_irq()). This gets rid of the irq_cfg[] static array on x86 as well: irq_cfg now uses desc->chip_data for x86 to store irq_cfg. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 10 ++ arch/x86/include/asm/irq_vectors.h | 9 ++ arch/x86/kernel/io_apic.c | 301 +++++++++++++++++++++++-------------- arch/x86/kernel/irq.c | 3 + arch/x86/kernel/irq_32.c | 2 + arch/x86/kernel/irq_64.c | 2 + arch/x86/kernel/irqinit_32.c | 1 - arch/x86/kernel/irqinit_64.c | 1 - drivers/char/random.c | 22 +-- drivers/pci/intr_remapping.c | 76 +++++++++- drivers/xen/events.c | 12 +- fs/proc/stat.c | 17 ++- include/linux/interrupt.h | 2 + include/linux/irq.h | 54 ++++++- include/linux/irqnr.h | 14 +- include/linux/kernel_stat.h | 14 +- include/linux/random.h | 51 +++++++ init/main.c | 11 ++ kernel/irq/autoprobe.c | 15 ++ kernel/irq/chip.c | 3 +- kernel/irq/handle.c | 181 +++++++++++++++++++++- kernel/irq/proc.c | 6 +- kernel/irq/spurious.c | 5 + 23 files changed, 649 insertions(+), 163 deletions(-) (limited to 'include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ac22bb7719f7..48ac688de3cd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -238,6 +238,16 @@ config X86_HAS_BOOT_CPU_ID def_bool y depends on X86_VOYAGER +config SPARSE_IRQ + bool "Support sparse irq numbering" + depends on PCI_MSI || HT_IRQ + default y + help + This enables support for sparse irq, esp for msi/msi-x. You may need + if you have lots of cards supports msi-x installed. + + If you don't know what to do here, say Y. + config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 0005adb0f941..bb6b69a6b125 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,11 +102,20 @@ #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) + +#ifndef CONFIG_SPARSE_IRQ # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif +#else +# if (8 * NR_CPUS) > (32 * MAX_IO_APICS) +# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) +# else +# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) +# endif +#endif #elif defined(CONFIG_X86_VOYAGER) diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9043251210fb..9de17f5c1125 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -108,8 +108,33 @@ static int __init parse_noapic(char *str) early_param("noapic", parse_noapic); struct irq_pin_list; + +/* + * This is performance-critical, we want to do it O(1) + * + * the indexing order of this array favors 1:1 mappings + * between pins and IRQs. + */ + +struct irq_pin_list { + int apic, pin; + struct irq_pin_list *next; +}; + +static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) +{ + struct irq_pin_list *pin; + int node; + + node = cpu_to_node(cpu); + + pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); + printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node); + + return pin; +} + struct irq_cfg { - unsigned int irq; struct irq_pin_list *irq_2_pin; cpumask_t domain; cpumask_t old_domain; @@ -119,83 +144,93 @@ struct irq_cfg { }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ +#ifdef CONFIG_SPARSE_IRQ +static struct irq_cfg irq_cfgx[] = { +#else static struct irq_cfg irq_cfgx[NR_IRQS] = { - [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, - [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, - [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, - [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, - [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, - [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, - [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, - [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, - [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, - [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, - [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, - [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, - [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, - [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, - [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, - [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, +#endif + [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, + [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, + [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, + [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, + [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, + [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, + [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, + [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, + [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, + [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, + [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, + [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, + [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, + [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, + [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, + [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; -#define for_each_irq_cfg(irq, cfg) \ - for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) - -static struct irq_cfg *irq_cfg(unsigned int irq) +void __init arch_early_irq_init(void) { - return irq < nr_irqs ? irq_cfgx + irq : NULL; -} + struct irq_cfg *cfg; + struct irq_desc *desc; + int count; + int i; -static struct irq_cfg *irq_cfg_alloc(unsigned int irq) -{ - return irq_cfg(irq); -} + cfg = irq_cfgx; + count = ARRAY_SIZE(irq_cfgx); -/* - * Rough estimation of how many shared IRQs there are, can be changed - * anytime. - */ -#define MAX_PLUS_SHARED_IRQS NR_IRQS -#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) + for (i = 0; i < count; i++) { + desc = irq_to_desc(i); + desc->chip_data = &cfg[i]; + } +} -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ +#ifdef CONFIG_SPARSE_IRQ +static struct irq_cfg *irq_cfg(unsigned int irq) +{ + struct irq_cfg *cfg = NULL; + struct irq_desc *desc; -struct irq_pin_list { - int apic, pin; - struct irq_pin_list *next; -}; + desc = irq_to_desc(irq); + if (desc) + cfg = desc->chip_data; -static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; -static struct irq_pin_list *irq_2_pin_ptr; + return cfg; +} -static void __init irq_2_pin_init(void) +static struct irq_cfg *get_one_free_irq_cfg(int cpu) { - struct irq_pin_list *pin = irq_2_pin_head; - int i; + struct irq_cfg *cfg; + int node; + + node = cpu_to_node(cpu); - for (i = 1; i < PIN_MAP_SIZE; i++) - pin[i-1].next = &pin[i]; + cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); + printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node); - irq_2_pin_ptr = &pin[0]; + return cfg; } -static struct irq_pin_list *get_one_free_irq_2_pin(void) +void arch_init_chip_data(struct irq_desc *desc, int cpu) { - struct irq_pin_list *pin = irq_2_pin_ptr; + struct irq_cfg *cfg; - if (!pin) - panic("can not get more irq_2_pin\n"); + cfg = desc->chip_data; + if (!cfg) { + desc->chip_data = get_one_free_irq_cfg(cpu); + if (!desc->chip_data) { + printk(KERN_ERR "can not alloc irq_cfg\n"); + BUG_ON(1); + } + } +} - irq_2_pin_ptr = pin->next; - pin->next = NULL; - return pin; +#else +static struct irq_cfg *irq_cfg(unsigned int irq) +{ + return irq < nr_irqs ? irq_cfgx + irq : NULL; } +#endif + struct io_apic { unsigned int index; unsigned int unused[3]; @@ -397,16 +432,19 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq(unsigned int irq, int apic, int pin) +static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin) { - struct irq_cfg *cfg; struct irq_pin_list *entry; + struct irq_cfg *cfg = irq_cfg(irq); - /* first time to refer irq_cfg, so with new */ - cfg = irq_cfg_alloc(irq); entry = cfg->irq_2_pin; if (!entry) { - entry = get_one_free_irq_2_pin(); + entry = get_one_free_irq_2_pin(cpu); + if (!entry) { + printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", + apic, pin); + return; + } cfg->irq_2_pin = entry; entry->apic = apic; entry->pin = pin; @@ -421,7 +459,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry = entry->next; } - entry->next = get_one_free_irq_2_pin(); + entry->next = get_one_free_irq_2_pin(cpu); entry = entry->next; entry->apic = apic; entry->pin = pin; @@ -430,7 +468,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) /* * Reroute an IRQ to a different pin. */ -static void __init replace_pin_at_irq(unsigned int irq, +static void __init replace_pin_at_irq(unsigned int irq, int cpu, int oldapic, int oldpin, int newapic, int newpin) { @@ -451,7 +489,7 @@ static void __init replace_pin_at_irq(unsigned int irq, /* why? call replace before add? */ if (!replaced) - add_pin_to_irq(irq, newapic, newpin); + add_pin_to_irq_cpu(irq, cpu, newapic, newpin); } static inline void io_apic_modify_irq(unsigned int irq, @@ -1162,9 +1200,13 @@ void __setup_vector_irq(int cpu) /* This function must be called with vector_lock held */ int irq, vector; struct irq_cfg *cfg; + struct irq_desc *desc; /* Mark the inuse vectors */ - for_each_irq_cfg(irq, cfg) { + for_each_irq_desc(irq, desc) { + if (!desc) + continue; + cfg = desc->chip_data; if (!cpu_isset(cpu, cfg->domain)) continue; vector = cfg->vector; @@ -1356,6 +1398,8 @@ static void __init setup_IO_APIC_irqs(void) { int apic, pin, idx, irq; int notcon = 0; + struct irq_desc *desc; + int cpu = boot_cpu_id; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); @@ -1387,7 +1431,12 @@ static void __init setup_IO_APIC_irqs(void) if (multi_timer_check(apic, irq)) continue; #endif - add_pin_to_irq(irq, apic, pin); + desc = irq_to_desc_alloc_cpu(irq, cpu); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + continue; + } + add_pin_to_irq_cpu(irq, cpu, apic, pin); setup_IO_APIC_irq(apic, pin, irq, irq_trigger(idx), irq_polarity(idx)); @@ -1448,6 +1497,7 @@ __apicdebuginit(void) print_IO_APIC(void) union IO_APIC_reg_03 reg_03; unsigned long flags; struct irq_cfg *cfg; + struct irq_desc *desc; unsigned int irq; if (apic_verbosity == APIC_QUIET) @@ -1537,8 +1587,13 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for_each_irq_cfg(irq, cfg) { - struct irq_pin_list *entry = cfg->irq_2_pin; + for_each_irq_desc(irq, desc) { + struct irq_pin_list *entry; + + if (!desc) + continue; + cfg = desc->chip_data; + entry = cfg->irq_2_pin; if (!entry) continue; printk(KERN_DEBUG "IRQ%d ", irq); @@ -2022,6 +2077,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) { int was_pending = 0; unsigned long flags; + struct irq_cfg *cfg; spin_lock_irqsave(&ioapic_lock, flags); if (irq < 16) { @@ -2029,6 +2085,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) if (i8259A_irq_pending(irq)) was_pending = 1; } + cfg = irq_cfg(irq); __unmask_IO_APIC_irq(irq); spin_unlock_irqrestore(&ioapic_lock, flags); @@ -2178,6 +2235,9 @@ static void ir_irq_migration(struct work_struct *work) struct irq_desc *desc; for_each_irq_desc(irq, desc) { + if (!desc) + continue; + if (desc->status & IRQ_MOVE_PENDING) { unsigned long flags; @@ -2229,6 +2289,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) struct irq_cfg *cfg; irq = __get_cpu_var(vector_irq)[vector]; + if (irq == -1) + continue; + desc = irq_to_desc(irq); if (!desc) continue; @@ -2430,8 +2493,12 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for_each_irq_cfg(irq, cfg) { - if (IO_APIC_IRQ(irq) && !cfg->vector) { + for_each_irq_desc(irq, desc) { + if (!desc) + continue; + + cfg = desc->chip_data; + if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 @@ -2439,11 +2506,9 @@ static inline void init_IO_APIC_traps(void) */ if (irq < 16) make_8259A_irq(irq); - else { - desc = irq_to_desc(irq); + else /* Strange. Oh, well.. */ desc->chip = &no_irq_chip; - } } } } @@ -2654,7 +2719,7 @@ static inline void __init check_timer(void) * Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) { - add_pin_to_irq(0, apic1, pin1); + add_pin_to_irq_cpu(0, boot_cpu_id, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } unmask_IO_APIC_irq(0); @@ -2683,7 +2748,7 @@ static inline void __init check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq(0, apic1, pin1, apic2, pin2); + replace_pin_at_irq(0, boot_cpu_id, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); unmask_IO_APIC_irq(0); enable_8259A_irq(0); @@ -2902,21 +2967,25 @@ unsigned int create_irq_nr(unsigned int irq_want) unsigned int irq; unsigned int new; unsigned long flags; - struct irq_cfg *cfg_new; - - irq_want = nr_irqs - 1; + struct irq_cfg *cfg_new = NULL; + int cpu = boot_cpu_id; + struct irq_desc *desc_new = NULL; irq = 0; spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new > 0; new--) { if (platform_legacy_irq(new)) continue; - cfg_new = irq_cfg(new); - if (cfg_new && cfg_new->vector != 0) + + desc_new = irq_to_desc_alloc_cpu(new, cpu); + if (!desc_new) { + printk(KERN_INFO "can not get irq_desc for %d\n", new); + continue; + } + cfg_new = desc_new->chip_data; + + if (cfg_new->vector != 0) continue; - /* check if need to create one */ - if (!cfg_new) - cfg_new = irq_cfg_alloc(new); if (__assign_irq_vector(new, TARGET_CPUS) == 0) irq = new; break; @@ -2925,6 +2994,9 @@ unsigned int create_irq_nr(unsigned int irq_want) if (irq > 0) { dynamic_irq_init(irq); + /* restore it, in case dynamic_irq_init clear it */ + if (desc_new) + desc_new->chip_data = cfg_new; } return irq; } @@ -2944,8 +3016,16 @@ int create_irq(void) void destroy_irq(unsigned int irq) { unsigned long flags; + struct irq_cfg *cfg; + struct irq_desc *desc; + /* store it, in case dynamic_irq_cleanup clear it */ + desc = irq_to_desc(irq); + cfg = desc->chip_data; dynamic_irq_cleanup(irq); + /* connect back irq_cfg */ + if (desc) + desc->chip_data = cfg; #ifdef CONFIG_INTR_REMAP free_irte(irq); @@ -3195,26 +3275,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) return 0; } -static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) -{ - unsigned int irq; - - irq = dev->bus->number; - irq <<= 8; - irq |= dev->devfn; - irq <<= 12; - - return irq; -} - -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) { unsigned int irq; int ret; unsigned int irq_want; - irq_want = build_irq_for_pci_dev(dev) + 0x100; - + irq_want = nr_irqs - 1; irq = create_irq_nr(irq_want); if (irq == 0) return -1; @@ -3228,7 +3295,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) goto error; no_ir: #endif - ret = setup_msi_irq(dev, desc, irq); + ret = setup_msi_irq(dev, msidesc, irq); if (ret < 0) { destroy_irq(irq); return ret; @@ -3246,7 +3313,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { unsigned int irq; int ret, sub_handle; - struct msi_desc *desc; + struct msi_desc *msidesc; unsigned int irq_want; #ifdef CONFIG_INTR_REMAP @@ -3254,10 +3321,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int index = 0; #endif - irq_want = build_irq_for_pci_dev(dev) + 0x100; + irq_want = nr_irqs - 1; sub_handle = 0; - list_for_each_entry(desc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want--); + list_for_each_entry(msidesc, &dev->msi_list, list) { + irq = create_irq_nr(irq_want); + irq_want--; if (irq == 0) return -1; #ifdef CONFIG_INTR_REMAP @@ -3289,7 +3357,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) } no_ir: #endif - ret = setup_msi_irq(dev, desc, irq); + ret = setup_msi_irq(dev, msidesc, irq); if (ret < 0) goto error; sub_handle++; @@ -3707,17 +3775,29 @@ int __init io_apic_get_version(int ioapic) int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) { + struct irq_desc *desc; + struct irq_cfg *cfg; + int cpu = boot_cpu_id; + if (!IO_APIC_IRQ(irq)) { apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", ioapic); return -EINVAL; } + desc = irq_to_desc_alloc_cpu(irq, cpu); + if (!desc) { + printk(KERN_INFO "can not get irq_desc %d\n", irq); + return 0; + } + /* * IRQs < 16 are already in the irq_2_pin[] map */ - if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); + if (irq >= 16) { + cfg = desc->chip_data; + add_pin_to_irq_cpu(irq, cpu, ioapic, pin); + } setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); @@ -3773,7 +3853,8 @@ void __init setup_ioapic_dest(void) * when you have too many devices, because at that time only boot * cpu is online. */ - cfg = irq_cfg(irq); + desc = irq_to_desc(irq); + cfg = desc->chip_data; if (!cfg->vector) { setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), @@ -3785,7 +3866,6 @@ void __init setup_ioapic_dest(void) /* * Honour affinities which have been set in early boot */ - desc = irq_to_desc(irq); if (desc->status & (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) mask = desc->affinity; @@ -3846,7 +3926,6 @@ void __init ioapic_init_mappings(void) struct resource *ioapic_res; int i; - irq_2_pin_init(); ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d1d4dc52f649..3f1d9d18df67 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -118,6 +118,9 @@ int show_interrupts(struct seq_file *p, void *v) } desc = irq_to_desc(i); + if (!desc) + return 0; + spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP any_count = kstat_irqs(i); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index a51382672de0..119fc9c8ff7f 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -242,6 +242,8 @@ void fixup_irqs(cpumask_t map) for_each_irq_desc(irq, desc) { cpumask_t mask; + if (!desc) + continue; if (irq == 2) continue; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 60eb84eb77a0..900009c70591 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -94,6 +94,8 @@ void fixup_irqs(cpumask_t map) int break_affinity = 0; int set_affinity = 1; + if (!desc) + continue; if (irq == 2) continue; diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 845aa9803e80..5a5651b7f9e6 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -69,7 +69,6 @@ void __init init_ISA_irqs (void) * 16 old-style INTA-cycle interrupts: */ for (i = 0; i < 16; i++) { - /* first time call this irq_desc */ struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index ff0235391285..cd9f42d028d9 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -143,7 +143,6 @@ void __init init_ISA_irqs(void) init_8259A(0); for (i = 0; i < 16; i++) { - /* first time call this irq_desc */ struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; diff --git a/drivers/char/random.c b/drivers/char/random.c index 675076f5fca8..d26891bfcd41 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -558,23 +558,9 @@ struct timer_rand_state { unsigned dont_count_entropy:1; }; -static struct timer_rand_state *irq_timer_state[NR_IRQS]; - -static struct timer_rand_state *get_timer_rand_state(unsigned int irq) -{ - if (irq >= nr_irqs) - return NULL; - - return irq_timer_state[irq]; -} - -static void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state) -{ - if (irq >= nr_irqs) - return; - - irq_timer_state[irq] = state; -} +#ifndef CONFIG_SPARSE_IRQ +struct timer_rand_state *irq_timer_state[NR_IRQS]; +#endif static struct timer_rand_state input_timer_state; @@ -933,8 +919,10 @@ void rand_initialize_irq(int irq) { struct timer_rand_state *state; +#ifndef CONFIG_SPARSE_IRQ if (irq >= nr_irqs) return; +#endif state = get_timer_rand_state(irq); diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 2de5a3238c94..c9958ec5e25e 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -19,17 +19,75 @@ struct irq_2_iommu { u8 irte_mask; }; -static struct irq_2_iommu irq_2_iommuX[NR_IRQS]; +#ifdef CONFIG_SPARSE_IRQ +static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu) +{ + struct irq_2_iommu *iommu; + int node; + + node = cpu_to_node(cpu); + + iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node); + printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node); + + return iommu; +} static struct irq_2_iommu *irq_2_iommu(unsigned int irq) { - return (irq < nr_irqs) ? irq_2_iommuX + irq : NULL; + struct irq_desc *desc; + + desc = irq_to_desc(irq); + + if (WARN_ON_ONCE(!desc)) + return NULL; + + return desc->irq_2_iommu; +} + +static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu) +{ + struct irq_desc *desc; + struct irq_2_iommu *irq_iommu; + + /* + * alloc irq desc if not allocated already. + */ + desc = irq_to_desc_alloc_cpu(irq, cpu); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + return NULL; + } + + irq_iommu = desc->irq_2_iommu; + + if (!irq_iommu) + desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu); + + return desc->irq_2_iommu; } +static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) +{ + return irq_2_iommu_alloc_cpu(irq, boot_cpu_id); +} + +#else /* !CONFIG_SPARSE_IRQ */ + +static struct irq_2_iommu irq_2_iommuX[NR_IRQS]; + +static struct irq_2_iommu *irq_2_iommu(unsigned int irq) +{ + if (irq < nr_irqs) + return &irq_2_iommuX[irq]; + + return NULL; +} static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq) { return irq_2_iommu(irq); } +#endif static DEFINE_SPINLOCK(irq_2_ir_lock); @@ -86,9 +144,11 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) if (!count) return -1; +#ifndef CONFIG_SPARSE_IRQ /* protect irq_2_iommu_alloc later */ if (irq >= nr_irqs) return -1; +#endif /* * start the IRTE search from index 0. @@ -130,6 +190,12 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) table->base[i].present = 1; irq_iommu = irq_2_iommu_alloc(irq); + if (!irq_iommu) { + spin_unlock(&irq_2_ir_lock); + printk(KERN_ERR "can't allocate irq_2_iommu\n"); + return -1; + } + irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = 0; @@ -177,6 +243,12 @@ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle) irq_iommu = irq_2_iommu_alloc(irq); + if (!irq_iommu) { + spin_unlock(&irq_2_ir_lock); + printk(KERN_ERR "can't allocate irq_2_iommu\n"); + return -1; + } + irq_iommu->iommu = iommu; irq_iommu->irte_index = index; irq_iommu->sub_handle = subhandle; diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 1e3b934a4cf7..2924faa7f6c4 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -141,8 +141,12 @@ static void init_evtchn_cpu_bindings(void) int i; /* By default all event channels notify CPU#0. */ - for_each_irq_desc(i, desc) + for_each_irq_desc(i, desc) { + if (!desc) + continue; + desc->affinity = cpumask_of_cpu(0); + } #endif memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); @@ -231,7 +235,7 @@ static int find_unbound_irq(void) int irq; /* Only allocate from dynirq range */ - for_each_irq_nr(irq) + for (irq = 0; irq < nr_irqs; irq++) if (irq_bindcount[irq] == 0) break; @@ -792,7 +796,7 @@ void xen_irq_resume(void) mask_evtchn(evtchn); /* No IRQ <-> event-channel mappings. */ - for_each_irq_nr(irq) + for (irq = 0; irq < nr_irqs; irq++) irq_info[irq].evtchn = 0; /* zap event-channel binding */ for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) @@ -824,7 +828,7 @@ void __init xen_init_IRQ(void) mask_evtchn(i); /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ - for_each_irq_nr(i) + for (i = 0; i < nr_irqs; i++) irq_bindcount[i] = 0; irq_ctx_init(smp_processor_id()); diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 81904f07679d..a13431ab7c65 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -27,6 +27,7 @@ static int show_stat(struct seq_file *p, void *v) u64 sum = 0; struct timespec boottime; unsigned int per_irq_sum; + struct irq_desc *desc; user = nice = system = idle = iowait = irq = softirq = steal = cputime64_zero; @@ -44,10 +45,11 @@ static int show_stat(struct seq_file *p, void *v) softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); - - for_each_irq_nr(j) + for_each_irq_desc(j, desc) { + if (!desc) + continue; sum += kstat_irqs_cpu(j, i); - + } sum += arch_irq_stat_cpu(i); } sum += arch_irq_stat(); @@ -90,11 +92,14 @@ static int show_stat(struct seq_file *p, void *v) seq_printf(p, "intr %llu", (unsigned long long)sum); /* sum again ? it could be updated? */ - for_each_irq_nr(j) { + for (j = 0; j < NR_IRQS; j++) { + desc = irq_to_desc(j); per_irq_sum = 0; - for_each_possible_cpu(i) - per_irq_sum += kstat_irqs_cpu(j, i); + if (desc) { + for_each_possible_cpu(i) + per_irq_sum += kstat_irqs_cpu(j, i); + } seq_printf(p, " %u", per_irq_sum); } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f58a0cf8929a..79e915e7e8a5 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -18,6 +18,8 @@ #include #include +extern int nr_irqs; + /* * These correspond to the IORESOURCE_IRQ_* defines in * linux/ioport.h to select the interrupt line behaviour. When diff --git a/include/linux/irq.h b/include/linux/irq.h index 3dddfa703ebd..63b00439d4d2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -129,6 +129,8 @@ struct irq_chip { const char *typename; }; +struct timer_rand_state; +struct irq_2_iommu; /** * struct irq_desc - interrupt descriptor * @irq: interrupt number for this descriptor @@ -154,6 +156,13 @@ struct irq_chip { */ struct irq_desc { unsigned int irq; +#ifdef CONFIG_SPARSE_IRQ + struct timer_rand_state *timer_rand_state; + unsigned int *kstat_irqs; +# ifdef CONFIG_INTR_REMAP + struct irq_2_iommu *irq_2_iommu; +# endif +#endif irq_flow_handler_t handle_irq; struct irq_chip *chip; struct msi_desc *msi_desc; @@ -181,14 +190,52 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; +extern void early_irq_init(void); +extern void arch_early_irq_init(void); +extern void arch_init_chip_data(struct irq_desc *desc, int cpu); +extern void arch_init_copy_chip_data(struct irq_desc *old_desc, + struct irq_desc *desc, int cpu); +extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); + +#ifndef CONFIG_SPARSE_IRQ extern struct irq_desc irq_desc[NR_IRQS]; static inline struct irq_desc *irq_to_desc(unsigned int irq) { - return (irq < nr_irqs) ? irq_desc + irq : NULL; + return (irq < NR_IRQS) ? irq_desc + irq : NULL; +} +static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) +{ + return irq_to_desc(irq); } +#ifdef CONFIG_GENERIC_HARDIRQS +# define for_each_irq_desc(irq, desc) \ + for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) +# define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \ + irq >= 0; irq--, desc--) +#endif + +#else + +extern struct irq_desc *irq_to_desc(unsigned int irq); +extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu); +extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); + +# define for_each_irq_desc(irq, desc) \ + for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; irq++, desc = irq_to_desc(irq)) +# define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; irq--, desc = irq_to_desc(irq)) + +#define kstat_irqs_this_cpu(DESC) \ + ((DESC)->kstat_irqs[smp_processor_id()]) +#define kstat_incr_irqs_this_cpu(irqno, DESC) \ + ((DESC)->kstat_irqs[smp_processor_id()]++) + +#endif + /* * Migration helpers for obsolete names, they will go away: */ @@ -380,6 +427,11 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); #define get_irq_data(irq) (irq_to_desc(irq)->handler_data) #define get_irq_msi(irq) (irq_to_desc(irq)->msi_desc) +#define get_irq_desc_chip(desc) ((desc)->chip) +#define get_irq_desc_chip_data(desc) ((desc)->chip_data) +#define get_irq_desc_data(desc) ((desc)->handler_data) +#define get_irq_desc_msi(desc) ((desc)->msi_desc) + #endif /* CONFIG_GENERIC_HARDIRQS */ #endif /* !CONFIG_S390 */ diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 452c280c8115..7a299e989f8b 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -7,18 +7,10 @@ # define for_each_irq_desc(irq, desc) \ for (irq = 0; irq < nr_irqs; irq++) -#else -extern int nr_irqs; -# define for_each_irq_desc(irq, desc) \ - for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) - -# define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \ - irq >= 0; irq--, desc--) +static inline early_sparse_irq_init(void) +{ +} #endif -#define for_each_irq_nr(irq) \ - for (irq = 0; irq < nr_irqs; irq++) - #endif diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 4a145caeee07..4ee4b3d2316f 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -28,7 +28,9 @@ struct cpu_usage_stat { struct kernel_stat { struct cpu_usage_stat cpustat; - unsigned int irqs[NR_IRQS]; +#ifndef CONFIG_SPARSE_IRQ + unsigned int irqs[NR_IRQS]; +#endif }; DECLARE_PER_CPU(struct kernel_stat, kstat); @@ -39,6 +41,10 @@ DECLARE_PER_CPU(struct kernel_stat, kstat); extern unsigned long long nr_context_switches(void); +#ifndef CONFIG_SPARSE_IRQ +#define kstat_irqs_this_cpu(irq) \ + (kstat_this_cpu.irqs[irq]) + struct irq_desc; static inline void kstat_incr_irqs_this_cpu(unsigned int irq, @@ -46,11 +52,17 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq, { kstat_this_cpu.irqs[irq]++; } +#endif + +#ifndef CONFIG_SPARSE_IRQ static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) { return kstat_cpu(cpu).irqs[irq]; } +#else +extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu); +#endif /* * Number of interrupts per specific IRQ source, since bootup diff --git a/include/linux/random.h b/include/linux/random.h index 36f125c0c603..ad9daa2374d5 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -44,6 +44,57 @@ struct rand_pool_info { extern void rand_initialize_irq(int irq); +struct timer_rand_state; +#ifndef CONFIG_SPARSE_IRQ + +extern struct timer_rand_state *irq_timer_state[]; + +extern int nr_irqs; +static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq) +{ + if (irq >= nr_irqs) + return NULL; + + return irq_timer_state[irq]; +} + +static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state) +{ + if (irq >= nr_irqs) + return; + + irq_timer_state[irq] = state; +} + +#else + +#include +static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + + if (!desc) + return NULL; + + return desc->timer_rand_state; +} + +static inline void set_timer_rand_state(unsigned int irq, struct timer_rand_state *state) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + + if (!desc) + return; + + desc->timer_rand_state = state; +} +#endif + + extern void add_input_randomness(unsigned int type, unsigned int code, unsigned int value); extern void add_interrupt_randomness(int irq); diff --git a/init/main.c b/init/main.c index 7e117a231af1..c1f999a3cf31 100644 --- a/init/main.c +++ b/init/main.c @@ -539,6 +539,15 @@ void __init __weak thread_info_cache_init(void) { } +void __init __weak arch_early_irq_init(void) +{ +} + +void __init __weak early_irq_init(void) +{ + arch_early_irq_init(); +} + asmlinkage void __init start_kernel(void) { char * command_line; @@ -603,6 +612,8 @@ asmlinkage void __init start_kernel(void) sort_main_extable(); trap_init(); rcu_init(); + /* init some links before init_ISA_irqs() */ + early_irq_init(); init_IRQ(); pidhash_init(); init_timers(); diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index cc0f7321b8ce..650ce4102a63 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -40,6 +40,9 @@ unsigned long probe_irq_on(void) * flush such a longstanding irq before considering it as spurious. */ for_each_irq_desc_reverse(i, desc) { + if (!desc) + continue; + spin_lock_irq(&desc->lock); if (!desc->action && !(desc->status & IRQ_NOPROBE)) { /* @@ -68,6 +71,9 @@ unsigned long probe_irq_on(void) * happened in the previous stage, it may have masked itself) */ for_each_irq_desc_reverse(i, desc) { + if (!desc) + continue; + spin_lock_irq(&desc->lock); if (!desc->action && !(desc->status & IRQ_NOPROBE)) { desc->status |= IRQ_AUTODETECT | IRQ_WAITING; @@ -86,6 +92,9 @@ unsigned long probe_irq_on(void) * Now filter out any obviously spurious interrupts */ for_each_irq_desc(i, desc) { + if (!desc) + continue; + spin_lock_irq(&desc->lock); status = desc->status; @@ -124,6 +133,9 @@ unsigned int probe_irq_mask(unsigned long val) int i; for_each_irq_desc(i, desc) { + if (!desc) + continue; + spin_lock_irq(&desc->lock); status = desc->status; @@ -166,6 +178,9 @@ int probe_irq_off(unsigned long val) unsigned int status; for_each_irq_desc(i, desc) { + if (!desc) + continue; + spin_lock_irq(&desc->lock); status = desc->status; diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 10b5092e9bfe..8e4fce4a1b1f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -24,9 +24,10 @@ */ void dynamic_irq_init(unsigned int irq) { - struct irq_desc *desc = irq_to_desc(irq); + struct irq_desc *desc; unsigned long flags; + desc = irq_to_desc(irq); if (!desc) { WARN(1, KERN_ERR "Trying to initialize invalid IRQ%d\n", irq); return; diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index c815b42d0f5b..96ca203eb51b 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -15,9 +15,16 @@ #include #include #include +#include +#include #include "internals.h" +/* + * lockdep: we want to handle all irq_desc locks as a single lock-class: + */ +static struct lock_class_key irq_desc_lock_class; + /** * handle_bad_irq - handle spurious and unhandled irqs * @irq: the interrupt number @@ -49,6 +56,155 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc) int nr_irqs = NR_IRQS; EXPORT_SYMBOL_GPL(nr_irqs); +void __init __attribute__((weak)) arch_early_irq_init(void) +{ +} + +#ifdef CONFIG_SPARSE_IRQ +static struct irq_desc irq_desc_init = { + .irq = -1, + .status = IRQ_DISABLED, + .chip = &no_irq_chip, + .handle_irq = handle_bad_irq, + .depth = 1, + .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), +#ifdef CONFIG_SMP + .affinity = CPU_MASK_ALL +#endif +}; + +static void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) +{ + unsigned long bytes; + char *ptr; + int node; + + /* Compute how many bytes we need per irq and allocate them */ + bytes = nr * sizeof(unsigned int); + + node = cpu_to_node(cpu); + ptr = kzalloc_node(bytes, GFP_ATOMIC, node); + printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n", cpu, node); + + if (ptr) + desc->kstat_irqs = (unsigned int *)ptr; +} + +void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu) +{ +} + +static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) +{ + memcpy(desc, &irq_desc_init, sizeof(struct irq_desc)); + desc->irq = irq; +#ifdef CONFIG_SMP + desc->cpu = cpu; +#endif + lockdep_set_class(&desc->lock, &irq_desc_lock_class); + init_kstat_irqs(desc, cpu, nr_cpu_ids); + if (!desc->kstat_irqs) { + printk(KERN_ERR "can not alloc kstat_irqs\n"); + BUG_ON(1); + } + arch_init_chip_data(desc, cpu); +} + +/* + * Protect the sparse_irqs: + */ +static DEFINE_SPINLOCK(sparse_irq_lock); + +struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly; + +static struct irq_desc irq_desc_legacy[16] __cacheline_aligned_in_smp = { + [0 ... 15] = { + .irq = -1, + .status = IRQ_DISABLED, + .chip = &no_irq_chip, + .handle_irq = handle_bad_irq, + .depth = 1, + .lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock), +#ifdef CONFIG_SMP + .affinity = CPU_MASK_ALL +#endif + } +}; + +/* FIXME: use bootmem alloc ...*/ +static unsigned int kstat_irqs_legacy[16][NR_CPUS]; + +void __init early_irq_init(void) +{ + struct irq_desc *desc; + int legacy_count; + int i; + + desc = irq_desc_legacy; + legacy_count = ARRAY_SIZE(irq_desc_legacy); + + for (i = 0; i < legacy_count; i++) { + desc[i].irq = i; + desc[i].kstat_irqs = kstat_irqs_legacy[i]; + + irq_desc_ptrs[i] = desc + i; + } + + for (i = legacy_count; i < NR_IRQS; i++) + irq_desc_ptrs[i] = NULL; + + arch_early_irq_init(); +} + +struct irq_desc *irq_to_desc(unsigned int irq) +{ + return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL; +} + +struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) +{ + struct irq_desc *desc; + unsigned long flags; + int node; + + if (irq >= NR_IRQS) { + printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n", + irq, NR_IRQS); + WARN_ON(1); + return NULL; + } + + desc = irq_desc_ptrs[irq]; + if (desc) + return desc; + + spin_lock_irqsave(&sparse_irq_lock, flags); + + /* We have to check it to avoid races with another CPU */ + desc = irq_desc_ptrs[irq]; + if (desc) + goto out_unlock; + + node = cpu_to_node(cpu); + desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); + printk(KERN_DEBUG " alloc irq_desc for %d on cpu %d node %d\n", + irq, cpu, node); + if (!desc) { + printk(KERN_ERR "can not alloc irq_desc\n"); + BUG_ON(1); + } + init_one_irq_desc(irq, desc, cpu); + + irq_desc_ptrs[irq] = desc; + +out_unlock: + spin_unlock_irqrestore(&sparse_irq_lock, flags); + + return desc; +} + +#else + struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { [0 ... NR_IRQS-1] = { .status = IRQ_DISABLED, @@ -62,6 +218,8 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { } }; +#endif + /* * What should we do if we get a hw irq event on an illegal vector? * Each architecture has to answer this themself. @@ -261,17 +419,28 @@ out: #ifdef CONFIG_TRACE_IRQFLAGS -/* - * lockdep: we want to handle all irq_desc locks as a single lock-class: - */ -static struct lock_class_key irq_desc_lock_class; - void early_init_irq_lock_class(void) { +#ifndef CONFIG_SPARSE_IRQ struct irq_desc *desc; int i; - for_each_irq_desc(i, desc) + for_each_irq_desc(i, desc) { + if (!desc) + continue; + lockdep_set_class(&desc->lock, &irq_desc_lock_class); + } +#endif +} +#endif + +#ifdef CONFIG_SPARSE_IRQ +unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) +{ + struct irq_desc *desc = irq_to_desc(irq); + return desc->kstat_irqs[cpu]; } #endif +EXPORT_SYMBOL(kstat_irqs_cpu); + diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index d257e7d6a8a4..f6b3440f05bc 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -243,7 +243,11 @@ void init_irq_proc(void) /* * Create entries for all existing IRQs. */ - for_each_irq_desc(irq, desc) + for_each_irq_desc(irq, desc) { + if (!desc) + continue; + register_irq_proc(irq, desc); + } } diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index dd364c11e56e..3738107531fd 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -91,6 +91,9 @@ static int misrouted_irq(int irq) int i, ok = 0; for_each_irq_desc(i, desc) { + if (!desc) + continue; + if (!i) continue; @@ -112,6 +115,8 @@ static void poll_spurious_irqs(unsigned long dummy) for_each_irq_desc(i, desc) { unsigned int status; + if (!desc) + continue; if (!i) continue; -- cgit v1.2.3 From 3145e941fcfe2548fa2270afb1a05bab3a6bc418 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Dec 2008 18:58:34 -0800 Subject: x86, MSI: pass irq_cfg and irq_desc Impact: simplify code Pass irq_desc and cfg around, instead of raw IRQ numbers - this way we dont have to look it up again and again. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 318 ++++++++++++++++++++++++++-------------------- drivers/pci/msi.c | 55 +++++--- include/linux/msi.h | 3 + 3 files changed, 222 insertions(+), 154 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 0dcde74abd1d..a1a2e070f31a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -231,6 +231,10 @@ static struct irq_cfg *irq_cfg(unsigned int irq) #endif +static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) +{ +} + struct io_apic { unsigned int index; unsigned int unused[3]; @@ -272,11 +276,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned writel(value, &io_apic->data); } -static bool io_apic_level_ack_pending(unsigned int irq) +static bool io_apic_level_ack_pending(struct irq_cfg *cfg) { struct irq_pin_list *entry; unsigned long flags; - struct irq_cfg *cfg = irq_cfg(irq); spin_lock_irqsave(&ioapic_lock, flags); entry = cfg->irq_2_pin; @@ -358,13 +361,12 @@ static void ioapic_mask_entry(int apic, int pin) } #ifdef CONFIG_SMP -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) { int apic, pin; - struct irq_cfg *cfg; struct irq_pin_list *entry; + u8 vector = cfg->vector; - cfg = irq_cfg(irq); entry = cfg->irq_2_pin; for (;;) { unsigned int reg; @@ -394,24 +396,27 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) } } -static int assign_irq_vector(int irq, cpumask_t mask); +static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask); -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +static void set_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) { struct irq_cfg *cfg; unsigned long flags; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; + unsigned int irq; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; - cfg = irq_cfg(irq); - if (assign_irq_vector(irq, mask)) + irq = desc->irq; + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); /* @@ -419,12 +424,20 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) */ dest = SET_APIC_LOGICAL_ID(dest); - desc = irq_to_desc(irq); spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); + __target_IO_APIC_irq(irq, dest, cfg); desc->affinity = mask; spin_unlock_irqrestore(&ioapic_lock, flags); } + +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + + set_ioapic_affinity_irq_desc(desc, mask); +} #endif /* CONFIG_SMP */ /* @@ -432,10 +445,9 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin) +static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) { struct irq_pin_list *entry; - struct irq_cfg *cfg = irq_cfg(irq); entry = cfg->irq_2_pin; if (!entry) { @@ -468,11 +480,10 @@ static void add_pin_to_irq_cpu(unsigned int irq, int cpu, int apic, int pin) /* * Reroute an IRQ to a different pin. */ -static void __init replace_pin_at_irq(unsigned int irq, int cpu, +static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, int oldapic, int oldpin, int newapic, int newpin) { - struct irq_cfg *cfg = irq_cfg(irq); struct irq_pin_list *entry = cfg->irq_2_pin; int replaced = 0; @@ -489,18 +500,16 @@ static void __init replace_pin_at_irq(unsigned int irq, int cpu, /* why? call replace before add? */ if (!replaced) - add_pin_to_irq_cpu(irq, cpu, newapic, newpin); + add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); } -static inline void io_apic_modify_irq(unsigned int irq, +static inline void io_apic_modify_irq(struct irq_cfg *cfg, int mask_and, int mask_or, void (*final)(struct irq_pin_list *entry)) { int pin; - struct irq_cfg *cfg; struct irq_pin_list *entry; - cfg = irq_cfg(irq); for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { unsigned int reg; pin = entry->pin; @@ -513,9 +522,9 @@ static inline void io_apic_modify_irq(unsigned int irq, } } -static void __unmask_IO_APIC_irq(unsigned int irq) +static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); + io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); } #ifdef CONFIG_X86_64 @@ -530,47 +539,64 @@ void io_apic_sync(struct irq_pin_list *entry) readl(&io_apic->data); } -static void __mask_IO_APIC_irq(unsigned int irq) +static void __mask_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); + io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } #else /* CONFIG_X86_32 */ -static void __mask_IO_APIC_irq(unsigned int irq) +static void __mask_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); + io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); } -static void __mask_and_edge_IO_APIC_irq(unsigned int irq) +static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, + io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, IO_APIC_REDIR_MASKED, NULL); } -static void __unmask_and_level_IO_APIC_irq(unsigned int irq) +static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) { - io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, + io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, IO_APIC_REDIR_LEVEL_TRIGGER, NULL); } #endif /* CONFIG_X86_32 */ -static void mask_IO_APIC_irq (unsigned int irq) +static void mask_IO_APIC_irq_desc(struct irq_desc *desc) { + struct irq_cfg *cfg = desc->chip_data; unsigned long flags; + BUG_ON(!cfg); + spin_lock_irqsave(&ioapic_lock, flags); - __mask_IO_APIC_irq(irq); + __mask_IO_APIC_irq(cfg); spin_unlock_irqrestore(&ioapic_lock, flags); } -static void unmask_IO_APIC_irq (unsigned int irq) +static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) { + struct irq_cfg *cfg = desc->chip_data; unsigned long flags; spin_lock_irqsave(&ioapic_lock, flags); - __unmask_IO_APIC_irq(irq); + __unmask_IO_APIC_irq(cfg); spin_unlock_irqrestore(&ioapic_lock, flags); } +static void mask_IO_APIC_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + mask_IO_APIC_irq_desc(desc); +} +static void unmask_IO_APIC_irq(unsigned int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + + unmask_IO_APIC_irq_desc(desc); +} + static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; @@ -1072,7 +1098,7 @@ void unlock_vector_lock(void) spin_unlock(&vector_lock); } -static int __assign_irq_vector(int irq, cpumask_t mask) +static int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) { /* * NOTE! The local APIC isn't very good at handling @@ -1088,16 +1114,13 @@ static int __assign_irq_vector(int irq, cpumask_t mask) static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; unsigned int old_vector; int cpu; - struct irq_cfg *cfg; - cfg = irq_cfg(irq); + if ((cfg->move_in_progress) || cfg->move_cleanup_count) + return -EBUSY; /* Only try and allocate irqs on cpus that are present */ cpus_and(mask, mask, cpu_online_map); - if ((cfg->move_in_progress) || cfg->move_cleanup_count) - return -EBUSY; - old_vector = cfg->vector; if (old_vector) { cpumask_t tmp; @@ -1151,24 +1174,22 @@ next: return -ENOSPC; } -static int assign_irq_vector(int irq, cpumask_t mask) +static int assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask) { int err; unsigned long flags; spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, mask); + err = __assign_irq_vector(irq, cfg, mask); spin_unlock_irqrestore(&vector_lock, flags); return err; } -static void __clear_irq_vector(int irq) +static void __clear_irq_vector(int irq, struct irq_cfg *cfg) { - struct irq_cfg *cfg; cpumask_t mask; int cpu, vector; - cfg = irq_cfg(irq); BUG_ON(!cfg->vector); vector = cfg->vector; @@ -1257,11 +1278,8 @@ static inline int IO_APIC_irq_trigger(int irq) } #endif -static void ioapic_register_intr(int irq, unsigned long trigger) +static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) @@ -1353,7 +1371,7 @@ static int setup_ioapic_entry(int apic, int irq, return 0; } -static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, +static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, int trigger, int polarity) { struct irq_cfg *cfg; @@ -1363,10 +1381,10 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, if (!IO_APIC_IRQ(irq)) return; - cfg = irq_cfg(irq); + cfg = desc->chip_data; mask = TARGET_CPUS; - if (assign_irq_vector(irq, mask)) + if (assign_irq_vector(irq, cfg, mask)) return; cpus_and(mask, cfg->domain, mask); @@ -1383,11 +1401,11 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, cfg->vector)) { printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", mp_ioapics[apic].mp_apicid, pin); - __clear_irq_vector(irq); + __clear_irq_vector(irq, cfg); return; } - ioapic_register_intr(irq, trigger); + ioapic_register_intr(irq, desc, trigger); if (irq < NR_IRQS_LEGACY) disable_8259A_irq(irq); @@ -1399,6 +1417,7 @@ static void __init setup_IO_APIC_irqs(void) int apic, pin, idx, irq; int notcon = 0; struct irq_desc *desc; + struct irq_cfg *cfg; int cpu = boot_cpu_id; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); @@ -1436,9 +1455,10 @@ static void __init setup_IO_APIC_irqs(void) printk(KERN_INFO "can not get irq_desc for %d\n", irq); continue; } - add_pin_to_irq_cpu(irq, cpu, apic, pin); + cfg = desc->chip_data; + add_pin_to_irq_cpu(cfg, cpu, apic, pin); - setup_IO_APIC_irq(apic, pin, irq, + setup_IO_APIC_irq(apic, pin, irq, desc, irq_trigger(idx), irq_polarity(idx)); } } @@ -2086,7 +2106,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) was_pending = 1; } cfg = irq_cfg(irq); - __unmask_IO_APIC_irq(irq); + __unmask_IO_APIC_irq(cfg); spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; @@ -2149,35 +2169,37 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); * as simple as edge triggered migration and we can do the irq migration * with a simple atomic update to IO-APIC RTE. */ -static void migrate_ioapic_irq(int irq, cpumask_t mask) +static void migrate_ioapic_irq_desc(struct irq_desc *desc, cpumask_t mask) { struct irq_cfg *cfg; - struct irq_desc *desc; cpumask_t tmp, cleanup_mask; struct irte irte; int modify_ioapic_rte; unsigned int dest; unsigned long flags; + unsigned int irq; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; + irq = desc->irq; if (get_irte(irq, &irte)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); - desc = irq_to_desc(irq); modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); + __target_IO_APIC_irq(irq, dest, cfg); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -2199,14 +2221,14 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask) desc->affinity = mask; } -static int migrate_irq_remapped_level(int irq) +static int migrate_irq_remapped_level_desc(struct irq_desc *desc) { int ret = -1; - struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; - mask_IO_APIC_irq(irq); + mask_IO_APIC_irq_desc(desc); - if (io_apic_level_ack_pending(irq)) { + if (io_apic_level_ack_pending(cfg)) { /* * Interrupt in progress. Migrating irq now will change the * vector information in the IO-APIC RTE and that will confuse @@ -2218,14 +2240,15 @@ static int migrate_irq_remapped_level(int irq) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq(irq, desc->pending_mask); + migrate_ioapic_irq_desc(desc, desc->pending_mask); ret = 0; desc->status &= ~IRQ_MOVE_PENDING; cpus_clear(desc->pending_mask); unmask: - unmask_IO_APIC_irq(irq); + unmask_IO_APIC_irq_desc(desc); + return ret; } @@ -2258,18 +2281,22 @@ static void ir_irq_migration(struct work_struct *work) /* * Migrates the IRQ destination in the process context. */ -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, cpumask_t mask) { - struct irq_desc *desc = irq_to_desc(irq); - if (desc->status & IRQ_LEVEL) { desc->status |= IRQ_MOVE_PENDING; desc->pending_mask = mask; - migrate_irq_remapped_level(irq); + migrate_irq_remapped_level_desc(desc); return; } - migrate_ioapic_irq(irq, mask); + migrate_ioapic_irq_desc(desc, mask); +} +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + + set_ir_ioapic_affinity_irq_desc(desc, mask); } #endif @@ -2313,9 +2340,10 @@ unlock: irq_exit(); } -static void irq_complete_move(unsigned int irq) +static void irq_complete_move(struct irq_desc **descp) { - struct irq_cfg *cfg = irq_cfg(irq); + struct irq_desc *desc = *descp; + struct irq_cfg *cfg = desc->chip_data; unsigned vector, me; if (likely(!cfg->move_in_progress)) @@ -2333,8 +2361,9 @@ static void irq_complete_move(unsigned int irq) } } #else -static inline void irq_complete_move(unsigned int irq) {} +static inline void irq_complete_move(struct irq_desc **descp) {} #endif + #ifdef CONFIG_INTR_REMAP static void ack_x2apic_level(unsigned int irq) { @@ -2345,11 +2374,14 @@ static void ack_x2apic_edge(unsigned int irq) { ack_x2APIC_irq(); } + #endif static void ack_apic_edge(unsigned int irq) { - irq_complete_move(irq); + struct irq_desc *desc = irq_to_desc(irq); + + irq_complete_move(&desc); move_native_irq(irq); ack_APIC_irq(); } @@ -2358,18 +2390,21 @@ atomic_t irq_mis_count; static void ack_apic_level(unsigned int irq) { + struct irq_desc *desc = irq_to_desc(irq); + #ifdef CONFIG_X86_32 unsigned long v; int i; #endif + struct irq_cfg *cfg; int do_unmask_irq = 0; - irq_complete_move(irq); + irq_complete_move(&desc); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { + if (unlikely(desc->status & IRQ_MOVE_PENDING)) { do_unmask_irq = 1; - mask_IO_APIC_irq(irq); + mask_IO_APIC_irq_desc(desc); } #endif @@ -2393,7 +2428,8 @@ static void ack_apic_level(unsigned int irq) * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro */ - i = irq_cfg(irq)->vector; + cfg = desc->chip_data; + i = cfg->vector; v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); #endif @@ -2432,17 +2468,18 @@ static void ack_apic_level(unsigned int irq) * accurate and is causing problems then it is a hardware bug * and you can go talk to the chipset vendor about it. */ - if (!io_apic_level_ack_pending(irq)) + cfg = desc->chip_data; + if (!io_apic_level_ack_pending(cfg)) move_masked_irq(irq); - unmask_IO_APIC_irq(irq); + unmask_IO_APIC_irq_desc(desc); } #ifdef CONFIG_X86_32 if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); + __mask_and_edge_IO_APIC_irq(cfg); + __unmask_and_level_IO_APIC_irq(cfg); spin_unlock(&ioapic_lock); } #endif @@ -2533,7 +2570,7 @@ static void unmask_lapic_irq(unsigned int irq) apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); } -static void ack_lapic_irq (unsigned int irq) +static void ack_lapic_irq(unsigned int irq) { ack_APIC_irq(); } @@ -2545,11 +2582,8 @@ static struct irq_chip lapic_chip __read_mostly = { .ack = ack_lapic_irq, }; -static void lapic_register_intr(int irq) +static void lapic_register_intr(int irq, struct irq_desc *desc) { - struct irq_desc *desc; - - desc = irq_to_desc(irq); desc->status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); @@ -2653,7 +2687,9 @@ int timer_through_8259 __initdata; */ static inline void __init check_timer(void) { - struct irq_cfg *cfg = irq_cfg(0); + struct irq_desc *desc = irq_to_desc(0); + struct irq_cfg *cfg = desc->chip_data; + int cpu = boot_cpu_id; int apic1, pin1, apic2, pin2; unsigned long flags; unsigned int ver; @@ -2668,7 +2704,7 @@ static inline void __init check_timer(void) * get/set the timer IRQ vector: */ disable_8259A_irq(0); - assign_irq_vector(0, TARGET_CPUS); + assign_irq_vector(0, cfg, TARGET_CPUS); /* * As IRQ0 is to be enabled in the 8259A, the virtual @@ -2719,10 +2755,10 @@ static inline void __init check_timer(void) * Ok, does IRQ0 through the IOAPIC work? */ if (no_pin1) { - add_pin_to_irq_cpu(0, boot_cpu_id, apic1, pin1); + add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } - unmask_IO_APIC_irq(0); + unmask_IO_APIC_irq_desc(desc); if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); @@ -2748,9 +2784,9 @@ static inline void __init check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - replace_pin_at_irq(0, boot_cpu_id, apic1, pin1, apic2, pin2); + replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - unmask_IO_APIC_irq(0); + unmask_IO_APIC_irq_desc(desc); enable_8259A_irq(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); @@ -2782,7 +2818,7 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); - lapic_register_intr(0); + lapic_register_intr(0, desc); apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ enable_8259A_irq(0); @@ -2986,7 +3022,7 @@ unsigned int create_irq_nr(unsigned int irq_want) if (cfg_new->vector != 0) continue; - if (__assign_irq_vector(new, TARGET_CPUS) == 0) + if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) irq = new; break; } @@ -3034,7 +3070,7 @@ void destroy_irq(unsigned int irq) free_irte(irq); #endif spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq); + __clear_irq_vector(irq, cfg); spin_unlock_irqrestore(&vector_lock, flags); } @@ -3049,12 +3085,12 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms unsigned dest; cpumask_t tmp; + cfg = irq_cfg(irq); tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); + err = assign_irq_vector(irq, cfg, tmp); if (err) return err; - cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); @@ -3112,35 +3148,35 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms #ifdef CONFIG_SMP static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); - read_msi_msg(irq, &msg); + read_msi_msg_desc(desc, &msg); msg.data &= ~MSI_DATA_VECTOR_MASK; msg.data |= MSI_DATA_VECTOR(cfg->vector); msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg.address_lo |= MSI_ADDR_DEST_ID(dest); - write_msi_msg(irq, &msg); - desc = irq_to_desc(irq); + write_msi_msg_desc(desc, &msg); desc->affinity = mask; } - #ifdef CONFIG_INTR_REMAP /* * Migrate the MSI irq to another cpumask. This migration is @@ -3148,11 +3184,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) */ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp, cleanup_mask; struct irte irte; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -3161,10 +3197,12 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) if (get_irte(irq, &irte)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -3188,9 +3226,9 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) cfg->move_in_progress = 0; } - desc = irq_to_desc(irq); desc->affinity = mask; } + #endif #endif /* CONFIG_SMP */ @@ -3249,7 +3287,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) } #endif -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) { int ret; struct msi_msg msg; @@ -3258,7 +3296,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) if (ret < 0) return ret; - set_irq_msi(irq, desc); + set_irq_msi(irq, msidesc); write_msi_msg(irq, &msg); #ifdef CONFIG_INTR_REMAP @@ -3381,20 +3419,22 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_SMP static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -3406,9 +3446,9 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); - desc = irq_to_desc(irq); desc->affinity = mask; } + #endif /* CONFIG_SMP */ struct irq_chip dmar_msi_type = { @@ -3442,8 +3482,8 @@ int arch_setup_dmar_msi(unsigned int irq) #ifdef CONFIG_SMP static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; - struct irq_desc *desc; struct msi_msg msg; unsigned int dest; cpumask_t tmp; @@ -3452,10 +3492,12 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) if (cpus_empty(tmp)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -3467,9 +3509,9 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); hpet_msi_write(irq, &msg); - desc = irq_to_desc(irq); desc->affinity = mask; } + #endif /* CONFIG_SMP */ struct irq_chip hpet_msi_type = { @@ -3524,26 +3566,28 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; - if (assign_irq_vector(irq, mask)) + cfg = desc->chip_data; + if (assign_irq_vector(irq, cfg, mask)) return; - cfg = irq_cfg(irq); + set_extra_move_desc(desc, mask); + cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, cfg->vector); - desc = irq_to_desc(irq); desc->affinity = mask; } + #endif static struct irq_chip ht_irq_chip = { @@ -3563,13 +3607,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) int err; cpumask_t tmp; + cfg = irq_cfg(irq); tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); + err = assign_irq_vector(irq, cfg, tmp); if (!err) { struct ht_irq_msg msg; unsigned dest; - cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); @@ -3615,7 +3659,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long flags; int err; - err = assign_irq_vector(irq, *eligible_cpu); + cfg = irq_cfg(irq); + + err = assign_irq_vector(irq, cfg, *eligible_cpu); if (err != 0) return err; @@ -3624,8 +3670,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, irq_name); spin_unlock_irqrestore(&vector_lock, flags); - cfg = irq_cfg(irq); - mmr_value = 0; entry = (struct uv_IO_APIC_route_entry *)&mmr_value; BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); @@ -3806,10 +3850,10 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p */ if (irq >= NR_IRQS_LEGACY) { cfg = desc->chip_data; - add_pin_to_irq_cpu(irq, cpu, ioapic, pin); + add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); } - setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); + setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); return 0; } @@ -3866,7 +3910,7 @@ void __init setup_ioapic_dest(void) desc = irq_to_desc(irq); cfg = desc->chip_data; if (!cfg->vector) { - setup_IO_APIC_irq(ioapic, pin, irq, + setup_IO_APIC_irq(ioapic, pin, irq, desc, irq_trigger(irq_entry), irq_polarity(irq_entry)); continue; @@ -3884,10 +3928,10 @@ void __init setup_ioapic_dest(void) #ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq(irq, mask); + set_ir_ioapic_affinity_irq_desc(desc, mask); else #endif - set_ioapic_affinity_irq(irq, mask); + set_ioapic_affinity_irq_desc(desc, mask); } } diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 74801f7df9c9..11a51f8ed3b3 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -103,11 +103,11 @@ static void msix_set_enable(struct pci_dev *dev, int enable) } } -static void msix_flush_writes(unsigned int irq) +static void msix_flush_writes(struct irq_desc *desc) { struct msi_desc *entry; - entry = get_irq_msi(irq); + entry = get_irq_desc_msi(desc); BUG_ON(!entry || !entry->dev); switch (entry->msi_attrib.type) { case PCI_CAP_ID_MSI: @@ -135,11 +135,11 @@ static void msix_flush_writes(unsigned int irq) * Returns 1 if it succeeded in masking the interrupt and 0 if the device * doesn't support MSI masking. */ -static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) +static int msi_set_mask_bits(struct irq_desc *desc, u32 mask, u32 flag) { struct msi_desc *entry; - entry = get_irq_msi(irq); + entry = get_irq_desc_msi(desc); BUG_ON(!entry || !entry->dev); switch (entry->msi_attrib.type) { case PCI_CAP_ID_MSI: @@ -172,9 +172,9 @@ static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) return 1; } -void read_msi_msg(unsigned int irq, struct msi_msg *msg) +void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) { - struct msi_desc *entry = get_irq_msi(irq); + struct msi_desc *entry = get_irq_desc_msi(desc); switch(entry->msi_attrib.type) { case PCI_CAP_ID_MSI: { @@ -211,9 +211,16 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg) } } -void write_msi_msg(unsigned int irq, struct msi_msg *msg) +void read_msi_msg(unsigned int irq, struct msi_msg *msg) { - struct msi_desc *entry = get_irq_msi(irq); + struct irq_desc *desc = irq_to_desc(irq); + + read_msi_msg_desc(desc, msg); +} + +void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) +{ + struct msi_desc *entry = get_irq_desc_msi(desc); switch (entry->msi_attrib.type) { case PCI_CAP_ID_MSI: { @@ -252,21 +259,31 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg) entry->msg = *msg; } +void write_msi_msg(unsigned int irq, struct msi_msg *msg) +{ + struct irq_desc *desc = irq_to_desc(irq); + + write_msi_msg_desc(desc, msg); +} + void mask_msi_irq(unsigned int irq) { - msi_set_mask_bits(irq, 1, 1); - msix_flush_writes(irq); + struct irq_desc *desc = irq_to_desc(irq); + + msi_set_mask_bits(desc, 1, 1); + msix_flush_writes(desc); } void unmask_msi_irq(unsigned int irq) { - msi_set_mask_bits(irq, 1, 0); - msix_flush_writes(irq); + struct irq_desc *desc = irq_to_desc(irq); + + msi_set_mask_bits(desc, 1, 0); + msix_flush_writes(desc); } static int msi_free_irqs(struct pci_dev* dev); - static struct msi_desc* alloc_msi_entry(void) { struct msi_desc *entry; @@ -303,9 +320,11 @@ static void __pci_restore_msi_state(struct pci_dev *dev) pci_intx_for_msi(dev, 0); msi_set_enable(dev, 0); write_msi_msg(dev->irq, &entry->msg); - if (entry->msi_attrib.maskbit) - msi_set_mask_bits(dev->irq, entry->msi_attrib.maskbits_mask, + if (entry->msi_attrib.maskbit) { + struct irq_desc *desc = irq_to_desc(dev->irq); + msi_set_mask_bits(desc, entry->msi_attrib.maskbits_mask, entry->msi_attrib.masked); + } pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); control &= ~PCI_MSI_FLAGS_QSIZE; @@ -327,8 +346,9 @@ static void __pci_restore_msix_state(struct pci_dev *dev) msix_set_enable(dev, 0); list_for_each_entry(entry, &dev->msi_list, list) { + struct irq_desc *desc = irq_to_desc(entry->irq); write_msi_msg(entry->irq, &entry->msg); - msi_set_mask_bits(entry->irq, 1, entry->msi_attrib.masked); + msi_set_mask_bits(desc, 1, entry->msi_attrib.masked); } BUG_ON(list_empty(&dev->msi_list)); @@ -596,7 +616,8 @@ void pci_msi_shutdown(struct pci_dev* dev) /* Return the the pci reset with msi irqs unmasked */ if (entry->msi_attrib.maskbit) { u32 mask = entry->msi_attrib.maskbits_mask; - msi_set_mask_bits(dev->irq, mask, ~mask); + struct irq_desc *desc = irq_to_desc(dev->irq); + msi_set_mask_bits(desc, mask, ~mask); } if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) return; diff --git a/include/linux/msi.h b/include/linux/msi.h index 8f2939227207..d2b8a1e8ca11 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -10,8 +10,11 @@ struct msi_msg { }; /* Helper functions */ +struct irq_desc; extern void mask_msi_irq(unsigned int irq); extern void unmask_msi_irq(unsigned int irq); +extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); +extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); extern void read_msi_msg(unsigned int irq, struct msi_msg *msg); extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); -- cgit v1.2.3 From 8b96f0119818964e4944fd1c423bf6770027d3ac Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 6 Dec 2008 03:40:00 +0100 Subject: tracing/function-graph-tracer: introduce __notrace_funcgraph to filter special functions Impact: trace more functions When the function graph tracer is configured, three more files are not traced to prevent only four functions to be traced. And this impacts the normal function tracer too. arch/x86/kernel/process_64/32.c: I had crashes when I let this file traced. After some debugging, I saw that the "current" task point was changed inside__swtich_to(), ie: "write_pda(pcurrent, next_p);" inside process_64.c Since the tracer store the original return address of the function inside current, we had crashes. Only __switch_to() has to be excluded from tracing. kernel/module.c and kernel/extable.c: Because of a function used internally by the function graph tracer: __kernel_text_address() To let the other functions inside these files to be traced, this patch introduces the __notrace_funcgraph function prefix which is __notrace if function graph tracer is configured and nothing if not. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 6 ------ arch/x86/kernel/process_32.c | 4 +++- arch/x86/kernel/process_64.c | 4 +++- include/linux/ftrace.h | 11 +++++++++++ kernel/Makefile | 4 ---- kernel/extable.c | 5 +++-- kernel/module.c | 2 +- 7 files changed, 21 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a3049da61985..1cad9318d217 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -14,12 +14,6 @@ CFLAGS_REMOVE_paravirt-spinlocks.o = -pg CFLAGS_REMOVE_ftrace.o = -pg endif -ifdef CONFIG_FUNCTION_GRAPH_TRACER -# Don't trace __switch_to() but let it for function tracer -CFLAGS_REMOVE_process_32.o = -pg -CFLAGS_REMOVE_process_64.o = -pg -endif - # # vsyscalls (which work on the user stack) should have # no stack-protector checks: diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0a1302fe6d45..24c2276aa453 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -548,7 +549,8 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, * the task-switch, and shows up in ret_from_fork in entry.S, * for example. */ -struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) +__notrace_funcgraph struct task_struct * +__switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c958120fb1b6..fbb321d53d34 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -551,8 +552,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, * - could test fs/gs bitsliced * * Kprobes not supported here. Set the probe on schedule instead. + * Function graph tracer not supported too. */ -struct task_struct * +__notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b9b4d0a22d10..449fa8e9e34f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -369,6 +369,14 @@ struct ftrace_graph_ret { }; #ifdef CONFIG_FUNCTION_GRAPH_TRACER + +/* + * Sometimes we don't want to trace a function with the function + * graph tracer but we want them to keep traced by the usual function + * tracer if the function graph tracer is not configured. + */ +#define __notrace_funcgraph notrace + #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of the callback handlers for tracing function graph*/ @@ -394,6 +402,9 @@ static inline int task_curr_ret_stack(struct task_struct *t) return t->curr_ret_stack; } #else + +#define __notrace_funcgraph + static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } diff --git a/kernel/Makefile b/kernel/Makefile index 703cf3b7389c..19fad003b19d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -21,10 +21,6 @@ CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg CFLAGS_REMOVE_sched.o = -pg endif -ifdef CONFIG_FUNCTION_GRAPH_TRACER -CFLAGS_REMOVE_extable.o = -pg # For __kernel_text_address() -CFLAGS_REMOVE_module.o = -pg # For __module_text_address() -endif obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o diff --git a/kernel/extable.c b/kernel/extable.c index a26cb2e17023..feb0317cf09a 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -17,6 +17,7 @@ */ #include #include +#include #include #include @@ -40,7 +41,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) return e; } -int core_kernel_text(unsigned long addr) +__notrace_funcgraph int core_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) @@ -53,7 +54,7 @@ int core_kernel_text(unsigned long addr) return 0; } -int __kernel_text_address(unsigned long addr) +__notrace_funcgraph int __kernel_text_address(unsigned long addr) { if (core_kernel_text(addr)) return 1; diff --git a/kernel/module.c b/kernel/module.c index 89bcf7c1327d..dd2a54155b54 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2704,7 +2704,7 @@ int is_module_address(unsigned long addr) /* Is this a valid kernel address? */ -struct module *__module_text_address(unsigned long addr) +__notrace_funcgraph struct module *__module_text_address(unsigned long addr) { struct module *mod; -- cgit v1.2.3 From 380c4b1411ccd6885f92b2c8ceb08433a720f44e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 6 Dec 2008 03:43:41 +0100 Subject: tracing/function-graph-tracer: append the tracing_graph_flag Impact: Provide a way to pause the function graph tracer As suggested by Steven Rostedt, the previous patch that prevented from spinlock function tracing shouldn't use the raw_spinlock to fix it. It's much better to follow lockdep with normal spinlock, so this patch adds a new flag for each task to make the function graph tracer able to be paused. We also can send an ftrace_printk whithout worrying of the irrelevant traced spinlock during insertion. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 5 ++++- include/linux/ftrace.h | 13 +++++++++++++ include/linux/sched.h | 2 ++ kernel/trace/ftrace.c | 2 ++ kernel/trace/trace.c | 18 +++++------------- 5 files changed, 26 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index f98c4076a170..1b43086b097a 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -476,7 +476,10 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) &return_to_handler; /* Nmi's are currently unsupported */ - if (atomic_read(&in_nmi)) + if (unlikely(atomic_read(&in_nmi))) + return; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; /* diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 449fa8e9e34f..11cac81eed08 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -401,6 +401,16 @@ static inline int task_curr_ret_stack(struct task_struct *t) { return t->curr_ret_stack; } + +static inline void pause_graph_tracing(void) +{ + atomic_inc(¤t->tracing_graph_pause); +} + +static inline void unpause_graph_tracing(void) +{ + atomic_dec(¤t->tracing_graph_pause); +} #else #define __notrace_funcgraph @@ -412,6 +422,9 @@ static inline int task_curr_ret_stack(struct task_struct *tsk) { return -1; } + +static inline void pause_graph_tracing(void) { } +static inline void unpause_graph_tracing(void) { } #endif #ifdef CONFIG_TRACING diff --git a/include/linux/sched.h b/include/linux/sched.h index 4c152e0acc9e..4b81fc5f7731 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1379,6 +1379,8 @@ struct task_struct { * because of depth overrun. */ atomic_t trace_overrun; + /* Pause for the tracing */ + atomic_t tracing_graph_pause; #endif #ifdef CONFIG_TRACING /* state flags for use by tracers */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2971fe48f55e..a12f80efceaa 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1998,6 +1998,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) /* Make sure IRQs see the -1 first: */ barrier(); t->ret_stack = ret_stack_list[start++]; + atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); } } while_each_thread(g, t); @@ -2077,6 +2078,7 @@ void ftrace_graph_init_task(struct task_struct *t) if (!t->ret_stack) return; t->curr_ret_stack = -1; + atomic_set(&t->tracing_graph_pause, 0); atomic_set(&t->trace_overrun, 0); } else t->ret_stack = NULL; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 33549537f30f..0b8659bd5ad2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3590,14 +3590,7 @@ static __init int tracer_init_debugfs(void) int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) { - /* - * Raw Spinlock because a normal spinlock would be traced here - * and append an irrelevant couple spin_lock_irqsave/ - * spin_unlock_irqrestore traced by ftrace around this - * TRACE_PRINTK trace. - */ - static raw_spinlock_t trace_buf_lock = - (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + static DEFINE_SPINLOCK(trace_buf_lock); static char trace_buf[TRACE_BUF_SIZE]; struct ring_buffer_event *event; @@ -3618,8 +3611,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) if (unlikely(atomic_read(&data->disabled))) goto out; - local_irq_save(flags); - __raw_spin_lock(&trace_buf_lock); + pause_graph_tracing(); + spin_lock_irqsave(&trace_buf_lock, irq_flags); len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); len = min(len, TRACE_BUF_SIZE-1); @@ -3640,9 +3633,8 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) ring_buffer_unlock_commit(tr->buffer, event, irq_flags); out_unlock: - __raw_spin_unlock(&trace_buf_lock); - local_irq_restore(flags); - + spin_unlock_irqrestore(&trace_buf_lock, irq_flags); + unpause_graph_tracing(); out: preempt_enable_notrace(); -- cgit v1.2.3 From 240d367b4e6c6e3c5075e034db14dba60a6f5fa7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 8 Dec 2008 14:06:17 -0800 Subject: sparseirq: fix Alpha build failure Impact: build fix on Alpha -tip testing found this build failure on the Alpha defconfig: /home/mingo/tip/fs/proc/stat.c: In function 'show_stat': /home/mingo/tip/fs/proc/stat.c:48: error: implicit declaration of function 'for_each_irq_desc' /home/mingo/tip/fs/proc/stat.c:48: error: expected ';' before '{' token can not use irq_desc() in stat.c on older architectures. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- fs/proc/stat.c | 20 +++++++++++++------- include/linux/irq.h | 9 --------- include/linux/irqnr.h | 19 ++++++++++++++++--- 3 files changed, 29 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/fs/proc/stat.c b/fs/proc/stat.c index a13431ab7c65..3cb9492801c0 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -45,9 +45,12 @@ static int show_stat(struct seq_file *p, void *v) softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); - for_each_irq_desc(j, desc) { + for_each_irq_nr(j) { +#ifdef CONFIG_SPARSE_IRQ + desc = irq_to_desc(j); if (!desc) continue; +#endif sum += kstat_irqs_cpu(j, i); } sum += arch_irq_stat_cpu(i); @@ -92,14 +95,17 @@ static int show_stat(struct seq_file *p, void *v) seq_printf(p, "intr %llu", (unsigned long long)sum); /* sum again ? it could be updated? */ - for (j = 0; j < NR_IRQS; j++) { - desc = irq_to_desc(j); + for_each_irq_nr(j) { per_irq_sum = 0; - - if (desc) { - for_each_possible_cpu(i) - per_irq_sum += kstat_irqs_cpu(j, i); +#ifdef CONFIG_SPARSE_IRQ + desc = irq_to_desc(j); + if (!desc) { + seq_printf(p, " %u", per_irq_sum); + continue; } +#endif + for_each_possible_cpu(i) + per_irq_sum += kstat_irqs_cpu(j, i); seq_printf(p, " %u", per_irq_sum); } diff --git a/include/linux/irq.h b/include/linux/irq.h index 63b00439d4d2..b5749db3e5a1 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -198,7 +198,6 @@ extern void arch_init_copy_chip_data(struct irq_desc *old_desc, extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc); #ifndef CONFIG_SPARSE_IRQ - extern struct irq_desc irq_desc[NR_IRQS]; static inline struct irq_desc *irq_to_desc(unsigned int irq) @@ -210,14 +209,6 @@ static inline struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu) return irq_to_desc(irq); } -#ifdef CONFIG_GENERIC_HARDIRQS -# define for_each_irq_desc(irq, desc) \ - for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) -# define for_each_irq_desc_reverse(irq, desc) \ - for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \ - irq >= 0; irq--, desc--) -#endif - #else extern struct irq_desc *irq_to_desc(unsigned int irq); diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 7a299e989f8b..13754f813589 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -8,9 +8,22 @@ # define for_each_irq_desc(irq, desc) \ for (irq = 0; irq < nr_irqs; irq++) -static inline early_sparse_irq_init(void) -{ -} +# define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs - 1; irq >= 0; irq--) +#else +#ifndef CONFIG_SPARSE_IRQ + +struct irq_desc; +extern int nr_irqs; +# define for_each_irq_desc(irq, desc) \ + for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) +# define for_each_irq_desc_reverse(irq, desc) \ + for (irq = nr_irqs - 1, desc = irq_desc + (nr_irqs - 1); \ + irq >= 0; irq--, desc--) #endif +#endif + +#define for_each_irq_nr(irq) \ + for (irq = 0; irq < nr_irqs; irq++) #endif -- cgit v1.2.3 From a64e64944f4b8ce3288519555dbaa0232414b8ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 12 Nov 2008 18:37:41 -0500 Subject: [PATCH] return records for fork() both to child and parent Signed-off-by: Al Viro --- include/linux/audit.h | 2 ++ kernel/auditsc.c | 17 +++++++++++++++++ kernel/fork.c | 1 + 3 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 6272a395d43c..1b2a6a5c1876 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -391,6 +391,7 @@ extern int audit_classify_arch(int arch); #ifdef CONFIG_AUDITSYSCALL /* These are defined in auditsc.c */ /* Public API */ +extern void audit_finish_fork(struct task_struct *child); extern int audit_alloc(struct task_struct *task); extern void audit_free(struct task_struct *task); extern void audit_syscall_entry(int arch, @@ -504,6 +505,7 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) extern int audit_n_rules; extern int audit_signals; #else +#define audit_finish_fork(t) #define audit_alloc(t) ({ 0; }) #define audit_free(t) do { ; } while (0) #define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index cf5bc2f5f9c3..de8468050afa 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1548,6 +1548,23 @@ void audit_syscall_entry(int arch, int major, context->ppid = 0; } +void audit_finish_fork(struct task_struct *child) +{ + struct audit_context *ctx = current->audit_context; + struct audit_context *p = child->audit_context; + if (!p || !ctx || !ctx->auditable) + return; + p->arch = ctx->arch; + p->major = ctx->major; + memcpy(p->argv, ctx->argv, sizeof(ctx->argv)); + p->ctime = ctx->ctime; + p->dummy = ctx->dummy; + p->auditable = ctx->auditable; + p->in_syscall = ctx->in_syscall; + p->filterkey = kstrdup(ctx->filterkey, GFP_KERNEL); + p->ppid = current->pid; +} + /** * audit_syscall_exit - deallocate audit context after a system call * @tsk: task being audited diff --git a/kernel/fork.c b/kernel/fork.c index 2a372a0e206f..8d6a7dd9282b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1398,6 +1398,7 @@ long do_fork(unsigned long clone_flags, init_completion(&vfork); } + audit_finish_fork(p); tracehook_report_clone(trace, regs, clone_flags, nr, p); /* -- cgit v1.2.3 From 0b0c940a91f8e6fd0e1be3e01d5e98997446233b Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 18 Nov 2008 15:03:49 +0800 Subject: [PATCH] asm/generic: fix bug - kernel fails to build when enable some common audit code on Blackfin If you enable some common audit code, the kernel fails to build. In file included from lib/audit.c:17: include/asm-generic/audit_write.h:3: error: '__NR_swapon' undeclared here (not in a function) make[1]: *** [lib/audit.o] Error 1 make: *** [lib] Error 2 So do not use __NR_swapon if it isnt defined for a port. Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu Signed-off-by: Al Viro --- include/asm-generic/audit_write.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-generic/audit_write.h b/include/asm-generic/audit_write.h index f10d367fb2a5..c5f1c2c920e2 100644 --- a/include/asm-generic/audit_write.h +++ b/include/asm-generic/audit_write.h @@ -1,6 +1,8 @@ #include __NR_acct, +#ifdef __NR_swapon __NR_swapon, +#endif __NR_quotactl, __NR_truncate, #ifdef __NR_truncate64 -- cgit v1.2.3 From 48887e63d6e057543067327da6b091297f7fe645 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 6 Dec 2008 01:05:50 -0500 Subject: [PATCH] fix broken timestamps in AVC generated by kernel threads Timestamp in audit_context is valid only if ->in_syscall is set. Signed-off-by: Al Viro --- include/linux/audit.h | 4 ++-- kernel/audit.c | 4 +--- kernel/auditsc.c | 5 ++++- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 1b2a6a5c1876..8f0672d13eb1 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -435,7 +435,7 @@ static inline void audit_ptrace(struct task_struct *t) /* Private API (for audit.c only) */ extern unsigned int audit_serial(void); -extern void auditsc_get_stamp(struct audit_context *ctx, +extern int auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial); extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); #define audit_get_loginuid(t) ((t)->loginuid) @@ -518,7 +518,7 @@ extern int audit_signals; #define audit_inode(n,d) do { ; } while (0) #define audit_inode_child(d,i,p) do { ; } while (0) #define audit_core_dumps(i) do { ; } while (0) -#define auditsc_get_stamp(c,t,s) do { BUG(); } while (0) +#define auditsc_get_stamp(c,t,s) (0) #define audit_get_loginuid(t) (-1) #define audit_get_sessionid(t) (-1) #define audit_log_task_context(b) do { ; } while (0) diff --git a/kernel/audit.c b/kernel/audit.c index d8646c23b427..ce6d8ea3131e 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1121,9 +1121,7 @@ unsigned int audit_serial(void) static inline void audit_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial) { - if (ctx) - auditsc_get_stamp(ctx, t, serial); - else { + if (!ctx || !auditsc_get_stamp(ctx, t, serial)) { *t = CURRENT_TIME; *serial = audit_serial(); } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 0a13d6895494..2a3f0afc4d2a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1957,15 +1957,18 @@ EXPORT_SYMBOL_GPL(__audit_inode_child); * * Also sets the context as auditable. */ -void auditsc_get_stamp(struct audit_context *ctx, +int auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial) { + if (!ctx->in_syscall) + return 0; if (!ctx->serial) ctx->serial = audit_serial(); t->tv_sec = ctx->ctime.tv_sec; t->tv_nsec = ctx->ctime.tv_nsec; *serial = ctx->serial; ctx->auditable = 1; + return 1; } /* global counter which is incremented every time something logs in */ -- cgit v1.2.3 From 1e641743f055f075ed9a4edd75f1fb1e05669ddc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 9 Dec 2008 09:23:33 +0000 Subject: Audit: Log TIOCSTI AUDIT_TTY records currently log all data read by processes marked for TTY input auditing, even if the data was "pushed back" using the TIOCSTI ioctl, not typed by the user. This patch records all TIOCSTI calls to disambiguate the input. It generates one audit message per character pushed back; considering TIOCSTI is used very rarely, this simple solution is probably good enough. (The only program I could find that uses TIOCSTI is mailx/nail in "header editing" mode, e.g. using the ~h escape. mailx is used very rarely, and the escapes are used even rarer.) Signed-Off-By: Miloslav Trmac Signed-off-by: Al Viro Signed-off-by: James Morris --- drivers/char/tty_audit.c | 78 +++++++++++++++++++++++++++++++++++++----------- drivers/char/tty_io.c | 1 + include/linux/tty.h | 4 +++ 3 files changed, 66 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c index d961fa9612c4..34ab6d798f81 100644 --- a/drivers/char/tty_audit.c +++ b/drivers/char/tty_audit.c @@ -67,37 +67,45 @@ static void tty_audit_buf_put(struct tty_audit_buf *buf) tty_audit_buf_free(buf); } -/** - * tty_audit_buf_push - Push buffered data out - * - * Generate an audit message from the contents of @buf, which is owned by - * @tsk with @loginuid. @buf->mutex must be locked. - */ -static void tty_audit_buf_push(struct task_struct *tsk, uid_t loginuid, - unsigned int sessionid, - struct tty_audit_buf *buf) +static void tty_audit_log(const char *description, struct task_struct *tsk, + uid_t loginuid, unsigned sessionid, int major, + int minor, unsigned char *data, size_t size) { struct audit_buffer *ab; - if (buf->valid == 0) - return; - if (audit_enabled == 0) - return; ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_TTY); if (ab) { char name[sizeof(tsk->comm)]; uid_t uid = task_uid(tsk); - audit_log_format(ab, "tty pid=%u uid=%u auid=%u ses=%u " - "major=%d minor=%d comm=", + audit_log_format(ab, "%s pid=%u uid=%u auid=%u ses=%u " + "major=%d minor=%d comm=", description, tsk->pid, uid, loginuid, sessionid, - buf->major, buf->minor); + major, minor); get_task_comm(name, tsk); audit_log_untrustedstring(ab, name); audit_log_format(ab, " data="); - audit_log_n_hex(ab, buf->data, buf->valid); + audit_log_n_hex(ab, data, size); audit_log_end(ab); } +} + +/** + * tty_audit_buf_push - Push buffered data out + * + * Generate an audit message from the contents of @buf, which is owned by + * @tsk with @loginuid. @buf->mutex must be locked. + */ +static void tty_audit_buf_push(struct task_struct *tsk, uid_t loginuid, + unsigned int sessionid, + struct tty_audit_buf *buf) +{ + if (buf->valid == 0) + return; + if (audit_enabled == 0) + return; + tty_audit_log("tty", tsk, loginuid, sessionid, buf->major, buf->minor, + buf->data, buf->valid); buf->valid = 0; } @@ -151,6 +159,42 @@ void tty_audit_fork(struct signal_struct *sig) sig->tty_audit_buf = NULL; } +/** + * tty_audit_tiocsti - Log TIOCSTI + */ +void tty_audit_tiocsti(struct tty_struct *tty, char ch) +{ + struct tty_audit_buf *buf; + int major, minor, should_audit; + + spin_lock_irq(¤t->sighand->siglock); + should_audit = current->signal->audit_tty; + buf = current->signal->tty_audit_buf; + if (buf) + atomic_inc(&buf->count); + spin_unlock_irq(¤t->sighand->siglock); + + major = tty->driver->major; + minor = tty->driver->minor_start + tty->index; + if (buf) { + mutex_lock(&buf->mutex); + if (buf->major == major && buf->minor == minor) + tty_audit_buf_push_current(buf); + mutex_unlock(&buf->mutex); + tty_audit_buf_put(buf); + } + + if (should_audit && audit_enabled) { + uid_t auid; + unsigned int sessionid; + + auid = audit_get_loginuid(current); + sessionid = audit_get_sessionid(current); + tty_audit_log("ioctl=TIOCSTI", current, auid, sessionid, major, + minor, &ch, 1); + } +} + /** * tty_audit_push_task - Flush task's pending audit data */ diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 1412a8d1e58d..db15f9ba7c0b 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -2018,6 +2018,7 @@ static int tiocsti(struct tty_struct *tty, char __user *p) return -EPERM; if (get_user(ch, p)) return -EFAULT; + tty_audit_tiocsti(tty, ch); ld = tty_ldisc_ref_wait(tty); ld->ops->receive_buf(tty, &ch, &mbz, 1); tty_ldisc_deref(ld); diff --git a/include/linux/tty.h b/include/linux/tty.h index 3b8121d4e36f..580700f20a1c 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -442,6 +442,7 @@ extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, size_t size); extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); +extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern void tty_audit_push(struct tty_struct *tty); extern void tty_audit_push_task(struct task_struct *tsk, uid_t loginuid, u32 sessionid); @@ -450,6 +451,9 @@ static inline void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, size_t size) { } +static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) +{ +} static inline void tty_audit_exit(void) { } -- cgit v1.2.3 From c5af3a2e192d333997d1e191f3eba7fd2f869681 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 28 Nov 2008 13:29:45 +0000 Subject: ASoC: Add card registration API ASoC v2 allows cards, codecs and platforms to instantiate separately, with the overall ASoC device only being instantiated once all the required components have registered. As part of backporting Liam's work introduce an initial version of the card registration functions. At present these do nothing active and are internal only, they will be exposed to machine drivers after further backporting. Adding this now allows the datastructures used for dynamic card instantiation to be built up gradually. Signed-off-by: Mark Brown --- include/sound/soc.h | 5 +++++ sound/soc/soc-core.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 79d855d2bddd..4a578b5d855c 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -333,6 +333,11 @@ struct snd_soc_dai_link { /* SoC card */ struct snd_soc_card { char *name; + struct device *dev; + + struct list_head list; + + int instantiated; int (*probe)(struct platform_device *pdev); int (*remove)(struct platform_device *pdev); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 2f2a8d93bbf0..44fbd71ce80f 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -43,6 +43,12 @@ static DECLARE_WAIT_QUEUE_HEAD(soc_pm_waitq); static struct dentry *debugfs_root; #endif +static DEFINE_MUTEX(client_mutex); +static LIST_HEAD(card_list); + +static int snd_soc_register_card(struct snd_soc_card *card); +static int snd_soc_unregister_card(struct snd_soc_card *card); + /* * This is a timeout to do a DAPM powerdown after a stream is closed(). * It can be used to eliminate pops between different playback streams, e.g. @@ -784,6 +790,14 @@ static int soc_probe(struct platform_device *pdev) /* Bodge while we push things out of socdev */ card->socdev = socdev; + /* Bodge while we unpick instantiation */ + card->dev = &pdev->dev; + ret = snd_soc_register_card(card); + if (ret != 0) { + dev_err(&pdev->dev, "Failed to register card\n"); + return ret; + } + if (card->probe) { ret = card->probe(pdev); if (ret < 0) @@ -863,6 +877,8 @@ static int soc_remove(struct platform_device *pdev) if (card->remove) card->remove(pdev); + snd_soc_unregister_card(card); + return 0; } @@ -1957,6 +1973,52 @@ int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute) } EXPORT_SYMBOL_GPL(snd_soc_dai_digital_mute); +/** + * snd_soc_register_card - Register a card with the ASoC core + * + * @param card Card to register + * + * Note that currently this is an internal only function: it will be + * exposed to machine drivers after further backporting of ASoC v2 + * registration APIs. + */ +static int snd_soc_register_card(struct snd_soc_card *card) +{ + if (!card->name || !card->dev) + return -EINVAL; + + INIT_LIST_HEAD(&card->list); + card->instantiated = 0; + + mutex_lock(&client_mutex); + list_add(&card->list, &card_list); + mutex_unlock(&client_mutex); + + dev_dbg(card->dev, "Registered card '%s'\n", card->name); + + return 0; +} + +/** + * snd_soc_unregister_card - Unregister a card with the ASoC core + * + * @param card Card to unregister + * + * Note that currently this is an internal only function: it will be + * exposed to machine drivers after further backporting of ASoC v2 + * registration APIs. + */ +static int snd_soc_unregister_card(struct snd_soc_card *card) +{ + mutex_lock(&client_mutex); + list_del(&card->list); + mutex_unlock(&client_mutex); + + dev_dbg(card->dev, "Unregistered card '%s'\n", card->name); + + return 0; +} + static int __devinit snd_soc_init(void) { #ifdef CONFIG_DEBUG_FS -- cgit v1.2.3 From 9115171a6b79b6b4d5c6697f123556b6efc37f1f Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 30 Nov 2008 23:31:24 +0000 Subject: ASoC: Add DAI registration API Add API calls to register and unregister DAIs with the core. Currently these APIs are ineffective. Since multiple DAIs for a given device are a common case bulk variants are provided. Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 8 +++++ sound/soc/soc-core.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index e2d5f76838c6..24247f763608 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -100,6 +100,12 @@ struct snd_soc_dai_ops; struct snd_soc_dai; struct snd_ac97_bus_ops; +/* Digital Audio Interface registration */ +int snd_soc_register_dai(struct snd_soc_dai *dai); +void snd_soc_unregister_dai(struct snd_soc_dai *dai); +int snd_soc_register_dais(struct snd_soc_dai *dai, size_t count); +void snd_soc_unregister_dais(struct snd_soc_dai *dai, size_t count); + /* Digital Audio Interface clocking API.*/ int snd_soc_dai_set_sysclk(struct snd_soc_dai *dai, int clk_id, unsigned int freq, int dir); @@ -186,6 +192,8 @@ struct snd_soc_dai { unsigned int id; int ac97_control; + struct device *dev; + /* DAI callbacks */ int (*probe)(struct platform_device *pdev, struct snd_soc_dai *dai); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 44fbd71ce80f..03460b068f1e 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -45,6 +45,7 @@ static struct dentry *debugfs_root; static DEFINE_MUTEX(client_mutex); static LIST_HEAD(card_list); +static LIST_HEAD(dai_list); static int snd_soc_register_card(struct snd_soc_card *card); static int snd_soc_unregister_card(struct snd_soc_card *card); @@ -2019,6 +2020,88 @@ static int snd_soc_unregister_card(struct snd_soc_card *card) return 0; } +/** + * snd_soc_register_dai - Register a DAI with the ASoC core + * + * @param dai DAI to register + */ +int snd_soc_register_dai(struct snd_soc_dai *dai) +{ + if (!dai->name) + return -EINVAL; + + /* The device should become mandatory over time */ + if (!dai->dev) + printk(KERN_WARNING "No device for DAI %s\n", dai->name); + + INIT_LIST_HEAD(&dai->list); + + mutex_lock(&client_mutex); + list_add(&dai->list, &dai_list); + mutex_unlock(&client_mutex); + + pr_debug("Registered DAI '%s'\n", dai->name); + + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_register_dai); + +/** + * snd_soc_unregister_dai - Unregister a DAI from the ASoC core + * + * @param dai DAI to unregister + */ +void snd_soc_unregister_dai(struct snd_soc_dai *dai) +{ + mutex_lock(&client_mutex); + list_del(&dai->list); + mutex_unlock(&client_mutex); + + pr_debug("Unregistered DAI '%s'\n", dai->name); +} +EXPORT_SYMBOL_GPL(snd_soc_unregister_dai); + +/** + * snd_soc_register_dais - Register multiple DAIs with the ASoC core + * + * @param dai Array of DAIs to register + * @param count Number of DAIs + */ +int snd_soc_register_dais(struct snd_soc_dai *dai, size_t count) +{ + int i, ret; + + for (i = 0; i < count; i++) { + ret = snd_soc_register_dai(&dai[i]); + if (ret != 0) + goto err; + } + + return 0; + +err: + for (i--; i >= 0; i--) + snd_soc_unregister_dai(&dai[i]); + + return ret; +} +EXPORT_SYMBOL_GPL(snd_soc_register_dais); + +/** + * snd_soc_unregister_dais - Unregister multiple DAIs from the ASoC core + * + * @param dai Array of DAIs to unregister + * @param count Number of DAIs + */ +void snd_soc_unregister_dais(struct snd_soc_dai *dai, size_t count) +{ + int i; + + for (i = 0; i < count; i++) + snd_soc_unregister_dai(&dai[i]); +} +EXPORT_SYMBOL_GPL(snd_soc_unregister_dais); + static int __devinit snd_soc_init(void) { #ifdef CONFIG_DEBUG_FS -- cgit v1.2.3 From 12a48a8c0087ba39d926cf1d63938ccbdb9752c3 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 3 Dec 2008 19:40:30 +0000 Subject: ASoC: Add platform registration API ASoC v2 allows platform drivers to instantiate independantly of the overall ASoC card. This API allows drivers to notify the core when they are registered. Signed-off-by: Mark Brown --- include/sound/soc.h | 5 +++++ sound/soc/soc-core.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 4a578b5d855c..ce3661d07c24 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -149,6 +149,7 @@ struct snd_soc_ops; struct snd_soc_dai_mode; struct snd_soc_pcm_runtime; struct snd_soc_dai; +struct snd_soc_platform; struct snd_soc_codec; struct soc_enum; struct snd_soc_ac97_ops; @@ -158,6 +159,9 @@ typedef int (*hw_read_t)(void *,char* ,int); extern struct snd_ac97_bus_ops soc_ac97_ops; +int snd_soc_register_platform(struct snd_soc_platform *platform); +void snd_soc_unregister_platform(struct snd_soc_platform *platform); + /* pcm <-> DAI connect */ void snd_soc_free_pcms(struct snd_soc_device *socdev); int snd_soc_new_pcms(struct snd_soc_device *socdev, int idx, const char *xid); @@ -296,6 +300,7 @@ struct snd_soc_codec_device { /* SoC platform interface */ struct snd_soc_platform { char *name; + struct list_head list; int (*probe)(struct platform_device *pdev); int (*remove)(struct platform_device *pdev); diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 03460b068f1e..ffae370b45df 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -46,6 +46,7 @@ static struct dentry *debugfs_root; static DEFINE_MUTEX(client_mutex); static LIST_HEAD(card_list); static LIST_HEAD(dai_list); +static LIST_HEAD(platform_list); static int snd_soc_register_card(struct snd_soc_card *card); static int snd_soc_unregister_card(struct snd_soc_card *card); @@ -2102,6 +2103,43 @@ void snd_soc_unregister_dais(struct snd_soc_dai *dai, size_t count) } EXPORT_SYMBOL_GPL(snd_soc_unregister_dais); +/** + * snd_soc_register_platform - Register a platform with the ASoC core + * + * @param platform platform to register + */ +int snd_soc_register_platform(struct snd_soc_platform *platform) +{ + if (!platform->name) + return -EINVAL; + + INIT_LIST_HEAD(&platform->list); + + mutex_lock(&client_mutex); + list_add(&platform->list, &platform_list); + mutex_unlock(&client_mutex); + + pr_debug("Registered platform '%s'\n", platform->name); + + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_register_platform); + +/** + * snd_soc_unregister_platform - Unregister a platform from the ASoC core + * + * @param platform platform to unregister + */ +void snd_soc_unregister_platform(struct snd_soc_platform *platform) +{ + mutex_lock(&client_mutex); + list_del(&platform->list); + mutex_unlock(&client_mutex); + + pr_debug("Unregistered platform '%s'\n", platform->name); +} +EXPORT_SYMBOL_GPL(snd_soc_unregister_platform); + static int __devinit snd_soc_init(void) { #ifdef CONFIG_DEBUG_FS -- cgit v1.2.3 From 7b363e440021a1cf9ed76944b2685f48dacefb3e Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 9 Dec 2008 23:22:26 -0800 Subject: netpoll: fix race on poll_list resulting in garbage entry A few months back a race was discused between the netpoll napi service path, and the fast path through net_rx_action: http://kerneltrap.org/mailarchive/linux-netdev/2007/10/16/345470 A patch was submitted for that bug, but I think we missed a case. Consider the following scenario: INITIAL STATE CPU0 has one napi_struct A on its poll_list CPU1 is calling netpoll_send_skb and needs to call poll_napi on the same napi_struct A that CPU0 has on its list CPU0 CPU1 net_rx_action poll_napi !list_empty (returns true) locks poll_lock for A poll_one_napi napi->poll netif_rx_complete __napi_complete (removes A from poll_list) list_entry(list->next) In the above scenario, net_rx_action assumes that the per-cpu poll_list is exclusive to that cpu. netpoll of course violates that, and because the netpoll path can dequeue from the poll list, its possible for CPU0 to detect a non-empty list at the top of the while loop in net_rx_action, but have it become empty by the time it calls list_entry. Since the poll_list isn't surrounded by any other structure, the returned data from that list_entry call in this situation is garbage, and any number of crashes can result based on what exactly that garbage is. Given that its not fasible for performance reasons to place exclusive locks arround each cpus poll list to provide that mutal exclusion, I think the best solution is modify the netpoll path in such a way that we continue to guarantee that the poll_list for a cpu is in fact exclusive to that cpu. To do this I've implemented the patch below. It adds an additional bit to the state field in the napi_struct. When executing napi->poll from the netpoll_path, this bit will be set. When a driver calls netif_rx_complete, if that bit is set, it will not remove the napi_struct from the poll_list. That work will be saved for the next iteration of net_rx_action. I've tested this and it seems to work well. About the biggest drawback I can see to it is the fact that it might result in an extra loop through net_rx_action in the event that the device is actually contended for (i.e. the netpoll path actually preforms all the needed work no the device, and the call to net_rx_action winds up doing nothing, except removing the napi_struct from the poll_list. However I think this is probably a small price to pay, given that the alternative is a crash. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ net/core/netpoll.c | 2 ++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9d77b1d7dca8..e26f54952892 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -319,6 +319,7 @@ enum { NAPI_STATE_SCHED, /* Poll is scheduled */ NAPI_STATE_DISABLE, /* Disable pending */ + NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ }; extern void __napi_schedule(struct napi_struct *n); @@ -1497,6 +1498,12 @@ static inline void netif_rx_complete(struct net_device *dev, { unsigned long flags; + /* + * don't let napi dequeue from the cpu poll list + * just in case its running on a different cpu + */ + if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state))) + return; local_irq_save(flags); __netif_rx_complete(dev, napi); local_irq_restore(flags); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 6c7af390be0a..dadac6281f20 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -133,9 +133,11 @@ static int poll_one_napi(struct netpoll_info *npinfo, npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); + set_bit(NAPI_STATE_NPSVC, &napi->state); work = napi->poll(napi, budget); + clear_bit(NAPI_STATE_NPSVC, &napi->state); atomic_dec(&trapped); npinfo->rx_flags &= ~NETPOLL_RX_DROP; -- cgit v1.2.3 From 58494487581cb143a0d763e3056a894d5009d60a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 26 Nov 2008 12:02:53 +0100 Subject: oprofile: update comment for oprofile_add_sample() The cpu argument is no longer part of the parameter list. Signed-off-by: Robert Richter --- include/linux/oprofile.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 5231861f357d..1ce9fe572e51 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -86,8 +86,7 @@ int oprofile_arch_init(struct oprofile_operations * ops); void oprofile_arch_exit(void); /** - * Add a sample. This may be called from any context. Pass - * smp_processor_id() as cpu. + * Add a sample. This may be called from any context. */ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event); -- cgit v1.2.3 From e09373f22e76cc048ca5fe10a9ff9012f5d64309 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 26 Nov 2008 14:04:19 +0100 Subject: ring_buffer: add remaining cpu functions to ring_buffer.h These functions are not yet in ring_buffer.h though they seems to be part of the API. Cc: Steven Rostedt Signed-off-by: Robert Richter --- include/linux/ring_buffer.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index e097c2e6b6dc..de9d8c12e5ec 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -116,6 +116,8 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu); unsigned long ring_buffer_entries(struct ring_buffer *buffer); unsigned long ring_buffer_overruns(struct ring_buffer *buffer); +unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); +unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); u64 ring_buffer_time_stamp(int cpu); void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); -- cgit v1.2.3 From cdc693643271b2e6a693cf8f6afb258cce01f058 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 10 Dec 2008 13:55:49 +0000 Subject: ALSA: Add support for mechanical jack insertion Some systems support both mechanical and electrical jack detection, allowing them to report that a jack is physically present but does not have any functioning connections. Add a new jack type for these, allowing user space to report faulty connections. Thanks to Guillem Jover for the suggestion. Signed-off-by: Mark Brown Signed-off-by: Takashi Iwai --- include/linux/input.h | 1 + include/sound/jack.h | 1 + sound/core/jack.c | 6 ++++++ 3 files changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/input.h b/include/linux/input.h index 7323d2ff5151..abd223b0f586 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -645,6 +645,7 @@ struct input_absinfo { #define SW_MICROPHONE_INSERT 0x04 /* set = inserted */ #define SW_DOCK 0x05 /* set = plugged into dock */ #define SW_LINEOUT_INSERT 0x06 /* set = inserted */ +#define SW_JACK_PHYSICAL_INSERT 0x07 /* set = mechanical switch set */ #define SW_MAX 0x0f #define SW_CNT (SW_MAX+1) diff --git a/include/sound/jack.h b/include/sound/jack.h index 7cb25f4b50bb..2e0315cdd0d6 100644 --- a/include/sound/jack.h +++ b/include/sound/jack.h @@ -36,6 +36,7 @@ enum snd_jack_types { SND_JACK_MICROPHONE = 0x0002, SND_JACK_HEADSET = SND_JACK_HEADPHONE | SND_JACK_MICROPHONE, SND_JACK_LINEOUT = 0x0004, + SND_JACK_MECHANICAL = 0x0008, /* If detected separately */ }; struct snd_jack { diff --git a/sound/core/jack.c b/sound/core/jack.c index c4bb9bad3133..6ebd5f12bc50 100644 --- a/sound/core/jack.c +++ b/sound/core/jack.c @@ -108,6 +108,9 @@ int snd_jack_new(struct snd_card *card, const char *id, int type, if (type & SND_JACK_MICROPHONE) input_set_capability(jack->input_dev, EV_SW, SW_MICROPHONE_INSERT); + if (type & SND_JACK_MECHANICAL) + input_set_capability(jack->input_dev, EV_SW, + SW_JACK_PHYSICAL_INSERT); err = snd_device_new(card, SNDRV_DEV_JACK, jack, &ops); if (err < 0) @@ -159,6 +162,9 @@ void snd_jack_report(struct snd_jack *jack, int status) if (jack->type & SND_JACK_MICROPHONE) input_report_switch(jack->input_dev, SW_MICROPHONE_INSERT, status & SND_JACK_MICROPHONE); + if (jack->type & SND_JACK_MECHANICAL) + input_report_switch(jack->input_dev, SW_JACK_PHYSICAL_INSERT, + status & SND_JACK_MECHANICAL); input_sync(jack->input_dev); } -- cgit v1.2.3 From 0d0cf00a7fc63cee9a4c4a3b8612879b4f7f42ba Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 10 Dec 2008 14:32:45 +0000 Subject: ASoC: Add codec registration API Another part of the backporting of Liam's ASoC v2 work. Using this is more complicated than the other registration types since currently the codec is instantiated during the probe of the ASoC device so we can't currently readily wait for the codec to register. Signed-off-by: Mark Brown --- include/sound/soc.h | 5 +++++ sound/soc/soc-core.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index ce3661d07c24..f86e455d3828 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -161,6 +161,8 @@ extern struct snd_ac97_bus_ops soc_ac97_ops; int snd_soc_register_platform(struct snd_soc_platform *platform); void snd_soc_unregister_platform(struct snd_soc_platform *platform); +int snd_soc_register_codec(struct snd_soc_codec *codec); +void snd_soc_unregister_codec(struct snd_soc_codec *codec); /* pcm <-> DAI connect */ void snd_soc_free_pcms(struct snd_soc_device *socdev); @@ -247,6 +249,9 @@ struct snd_soc_codec { char *name; struct module *owner; struct mutex mutex; + struct device *dev; + + struct list_head list; /* callbacks */ int (*set_bias_level)(struct snd_soc_codec *, diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 4d2db7cfaf4c..b098c0b4c584 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -47,6 +47,7 @@ static DEFINE_MUTEX(client_mutex); static LIST_HEAD(card_list); static LIST_HEAD(dai_list); static LIST_HEAD(platform_list); +static LIST_HEAD(codec_list); static int snd_soc_register_card(struct snd_soc_card *card); static int snd_soc_unregister_card(struct snd_soc_card *card); @@ -2224,6 +2225,48 @@ void snd_soc_unregister_platform(struct snd_soc_platform *platform) } EXPORT_SYMBOL_GPL(snd_soc_unregister_platform); +/** + * snd_soc_register_codec - Register a codec with the ASoC core + * + * @param codec codec to register + */ +int snd_soc_register_codec(struct snd_soc_codec *codec) +{ + if (!codec->name) + return -EINVAL; + + /* The device should become mandatory over time */ + if (!codec->dev) + printk(KERN_WARNING "No device for codec %s\n", codec->name); + + INIT_LIST_HEAD(&codec->list); + + mutex_lock(&client_mutex); + list_add(&codec->list, &codec_list); + snd_soc_instantiate_cards(); + mutex_unlock(&client_mutex); + + pr_debug("Registered codec '%s'\n", codec->name); + + return 0; +} +EXPORT_SYMBOL_GPL(snd_soc_register_codec); + +/** + * snd_soc_unregister_codec - Unregister a codec from the ASoC core + * + * @param codec codec to unregister + */ +void snd_soc_unregister_codec(struct snd_soc_codec *codec) +{ + mutex_lock(&client_mutex); + list_del(&codec->list); + mutex_unlock(&client_mutex); + + pr_debug("Unregistered codec '%s'\n", codec->name); +} +EXPORT_SYMBOL_GPL(snd_soc_unregister_codec); + static int __init snd_soc_init(void) { #ifdef CONFIG_DEBUG_FS -- cgit v1.2.3 From 71c5576fbd809f2015f4eddf72e501e298720cf3 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 9 Dec 2008 13:14:13 -0800 Subject: revert "percpu counter: clean up percpu_counter_sum_and_set()" Revert commit 1f7c14c62ce63805f9574664a6c6de3633d4a354 Author: Mingming Cao Date: Thu Oct 9 12:50:59 2008 -0400 percpu counter: clean up percpu_counter_sum_and_set() Before this patch we had the following: percpu_counter_sum(): return the percpu_counter's value percpu_counter_sum_and_set(): return the percpu_counter's value, copying that value into the central value and zeroing the per-cpu counters before returning. After this patch, percpu_counter_sum_and_set() has gone, and percpu_counter_sum() gets the old percpu_counter_sum_and_set() functionality. Problem is, as Eric points out, the old percpu_counter_sum_and_set() functionality was racy and wrong. It zeroes out counters on "other" cpus, without holding any locks which will prevent races agaist updates from those other CPUS. This patch reverts 1f7c14c62ce63805f9574664a6c6de3633d4a354. This means that percpu_counter_sum_and_set() still has the race, but percpu_counter_sum() does not. Note that this is not a simple revert - ext4 has since started using percpu_counter_sum() for its dirty_blocks counter as well. Note that this revert patch changes percpu_counter_sum() semantics. Before the patch, a call to percpu_counter_sum() will bring the counter's central counter mostly up-to-date, so a following percpu_counter_read() will return a close value. After this patch, a call to percpu_counter_sum() will leave the counter's central accumulator unaltered, so a subsequent call to percpu_counter_read() can now return a significantly inaccurate result. If there is any code in the tree which was introduced after e8ced39d5e8911c662d4d69a342b9d053eaaac4e was merged, and which depends upon the new percpu_counter_sum() semantics, that code will break. Reported-by: Eric Dumazet Cc: "David S. Miller" Cc: Peter Zijlstra Cc: Mingming Cao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/balloc.c | 4 ++-- include/linux/percpu_counter.h | 12 +++++++++--- lib/percpu_counter.c | 8 +++++--- 3 files changed, 16 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d2003cdc36aa..c17f69bcd7dd 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -609,8 +609,8 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) if (free_blocks - (nblocks + root_blocks + dirty_blocks) < EXT4_FREEBLOCKS_WATERMARK) { - free_blocks = percpu_counter_sum(fbc); - dirty_blocks = percpu_counter_sum(dbc); + free_blocks = percpu_counter_sum_and_set(fbc); + dirty_blocks = percpu_counter_sum_and_set(dbc); if (dirty_blocks < 0) { printk(KERN_CRIT "Dirty block accounting " "went wrong %lld\n", diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 9007ccdfc112..208388835357 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); -s64 __percpu_counter_sum(struct percpu_counter *fbc); +s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -44,13 +44,19 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { - s64 ret = __percpu_counter_sum(fbc); + s64 ret = __percpu_counter_sum(fbc, 0); return ret < 0 ? 0 : ret; } +static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc) +{ + return __percpu_counter_sum(fbc, 1); +} + + static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum(fbc); + return __percpu_counter_sum(fbc, 0); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 71b265c330ce..dba1530a5b29 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add); * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive() */ -s64 __percpu_counter_sum(struct percpu_counter *fbc) +s64 __percpu_counter_sum(struct percpu_counter *fbc, int set) { s64 ret; int cpu; @@ -62,9 +62,11 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) for_each_online_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; - *pcount = 0; + if (set) + *pcount = 0; } - fbc->count = ret; + if (set) + fbc->count = ret; spin_unlock(&fbc->lock); return ret; -- cgit v1.2.3 From 02d211688727ad02bb4555b1aa8ae2de16b21b39 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 9 Dec 2008 13:14:14 -0800 Subject: revert "percpu_counter: new function percpu_counter_sum_and_set" Revert commit e8ced39d5e8911c662d4d69a342b9d053eaaac4e Author: Mingming Cao Date: Fri Jul 11 19:27:31 2008 -0400 percpu_counter: new function percpu_counter_sum_and_set As described in revert "percpu counter: clean up percpu_counter_sum_and_set()" the new percpu_counter_sum_and_set() is racy against updates to the cpu-local accumulators on other CPUs. Revert that change. This means that ext4 will be slow again. But correct. Reported-by: Eric Dumazet Cc: "David S. Miller" Cc: Peter Zijlstra Cc: Mingming Cao Cc: Cc: [2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/balloc.c | 4 ++-- include/linux/percpu_counter.h | 12 +++--------- lib/percpu_counter.c | 7 +------ 3 files changed, 6 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index c17f69bcd7dd..db35cfdb3c8b 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -609,8 +609,8 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) if (free_blocks - (nblocks + root_blocks + dirty_blocks) < EXT4_FREEBLOCKS_WATERMARK) { - free_blocks = percpu_counter_sum_and_set(fbc); - dirty_blocks = percpu_counter_sum_and_set(dbc); + free_blocks = percpu_counter_sum_positive(fbc); + dirty_blocks = percpu_counter_sum_positive(dbc); if (dirty_blocks < 0) { printk(KERN_CRIT "Dirty block accounting " "went wrong %lld\n", diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 208388835357..9007ccdfc112 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); -s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); +s64 __percpu_counter_sum(struct percpu_counter *fbc); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -44,19 +44,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { - s64 ret = __percpu_counter_sum(fbc, 0); + s64 ret = __percpu_counter_sum(fbc); return ret < 0 ? 0 : ret; } -static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc) -{ - return __percpu_counter_sum(fbc, 1); -} - - static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum(fbc, 0); + return __percpu_counter_sum(fbc); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index dba1530a5b29..b255b939bc1b 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add); * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive() */ -s64 __percpu_counter_sum(struct percpu_counter *fbc, int set) +s64 __percpu_counter_sum(struct percpu_counter *fbc) { s64 ret; int cpu; @@ -62,12 +62,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc, int set) for_each_online_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; - if (set) - *pcount = 0; } - if (set) - fbc->count = ret; - spin_unlock(&fbc->lock); return ret; } -- cgit v1.2.3 From aa6f14796630c8b03c11e782484aec2aee05e671 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Dec 2008 13:14:25 -0800 Subject: atomic: fix a typo in atomic_long_xchg() atomic_long_xchg() is not correctly defined for 32bit arches. Signed-off-by: Eric Dumazet Cc: Mathieu Desnoyers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 4ec0a296bdec..7abdaa91ccd3 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -251,7 +251,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) #define atomic_long_cmpxchg(l, old, new) \ (atomic_cmpxchg((atomic_t *)(l), (old), (new))) #define atomic_long_xchg(v, new) \ - (atomic_xchg((atomic_t *)(l), (new))) + (atomic_xchg((atomic_t *)(v), (new))) #endif /* BITS_PER_LONG == 64 */ -- cgit v1.2.3 From 9c24624727f6d6c460e45762a408ca5f5b9b8ef2 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 9 Dec 2008 13:14:27 -0800 Subject: KSYM_SYMBOL_LEN fixes Miles Lane tailing /sys files hit a BUG which Pekka Enberg has tracked to my 966c8c12dc9e77f931e2281ba25d2f0244b06949 sprint_symbol(): use less stack exposing a bug in slub's list_locations() - kallsyms_lookup() writes a 0 to namebuf[KSYM_NAME_LEN-1], but that was beyond the end of page provided. The 100 slop which list_locations() allows at end of page looks roughly enough for all the other stuff it might print after the symbol before it checks again: break out KSYM_SYMBOL_LEN earlier than before. Latencytop and ftrace and are using KSYM_NAME_LEN buffers where they need KSYM_SYMBOL_LEN buffers, and vmallocinfo a 2*KSYM_NAME_LEN buffer where it wants a KSYM_SYMBOL_LEN buffer: fix those before anyone copies them. [akpm@linux-foundation.org: ftrace.h needs module.h] Signed-off-by: Hugh Dickins Cc: Christoph Lameter Cc Miles Lane Acked-by: Pekka Enberg Acked-by: Steven Rostedt Acked-by: Frederic Weisbecker Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 2 +- include/linux/ftrace.h | 3 ++- kernel/latencytop.c | 2 +- mm/slub.c | 2 +- mm/vmalloc.c | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/fs/proc/base.c b/fs/proc/base.c index 486cf3fe7139..d4677603c889 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -371,7 +371,7 @@ static int lstats_show_proc(struct seq_file *m, void *v) task->latency_record[i].time, task->latency_record[i].max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { - char sym[KSYM_NAME_LEN]; + char sym[KSYM_SYMBOL_LEN]; char *c; if (!task->latency_record[i].backtrace[q]) break; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 703eb53cfa2b..9c5bc6be2b09 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #ifdef CONFIG_FUNCTION_TRACER @@ -231,7 +232,7 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } struct boot_trace { pid_t caller; - char func[KSYM_NAME_LEN]; + char func[KSYM_SYMBOL_LEN]; int result; unsigned long long duration; /* usecs */ ktime_t calltime; diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 5e7b45c56923..449db466bdbc 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -191,7 +191,7 @@ static int lstats_show(struct seq_file *m, void *v) latency_record[i].time, latency_record[i].max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { - char sym[KSYM_NAME_LEN]; + char sym[KSYM_SYMBOL_LEN]; char *c; if (!latency_record[i].backtrace[q]) break; diff --git a/mm/slub.c b/mm/slub.c index 749588a50a5a..a2cd47d89e0a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3597,7 +3597,7 @@ static int list_locations(struct kmem_cache *s, char *buf, for (i = 0; i < t.count; i++) { struct location *l = &t.loc[i]; - if (len > PAGE_SIZE - 100) + if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100) break; len += sprintf(buf + len, "%7ld ", l->count); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f3f6e0758562..1ddb77ba3995 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1717,7 +1717,7 @@ static int s_show(struct seq_file *m, void *p) v->addr, v->addr + v->size, v->size); if (v->caller) { - char buff[2 * KSYM_NAME_LEN]; + char buff[KSYM_SYMBOL_LEN]; seq_putc(m, ' '); sprint_symbol(buff, (unsigned long)v->caller); -- cgit v1.2.3 From 0bed7b292d68f82316bfb8cd521e16c867689efe Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Wed, 5 Nov 2008 17:29:53 -0500 Subject: ALSA: cs5535audio: stick AD1888 bitshift values into a header file We'd like to use the High Pass Filter and V_REFOUT bitshift values elsewhere, so stick them into a ac97_codec.h. Signed-off-by: Andres Salomon Signed-off-by: Takashi Iwai --- include/sound/ac97_codec.h | 2 ++ sound/pci/ac97/ac97_patch.c | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/ac97_codec.h b/include/sound/ac97_codec.h index 9c309daf492b..251fc1cd5002 100644 --- a/include/sound/ac97_codec.h +++ b/include/sound/ac97_codec.h @@ -281,10 +281,12 @@ /* specific - Analog Devices */ #define AC97_AD_TEST 0x5a /* test register */ #define AC97_AD_TEST2 0x5c /* undocumented test register 2 */ +#define AC97_AD_HPFD_SHIFT 12 /* High Pass Filter Disable */ #define AC97_AD_CODEC_CFG 0x70 /* codec configuration */ #define AC97_AD_JACK_SPDIF 0x72 /* Jack Sense & S/PDIF */ #define AC97_AD_SERIAL_CFG 0x74 /* Serial Configuration */ #define AC97_AD_MISC 0x76 /* Misc Control Bits */ +#define AC97_AD_VREFD_SHIFT 2 /* V_REFOUT Disable (AD1888) */ /* specific - Cirrus Logic */ #define AC97_CSR_ACMODE 0x5e /* AC Mode Register */ diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 6e831aff1bd0..7ad25f439b50 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -2054,8 +2054,9 @@ static const struct snd_kcontrol_new snd_ac97_ad1888_controls[] = { .get = snd_ac97_ad1888_lohpsel_get, .put = snd_ac97_ad1888_lohpsel_put }, - AC97_SINGLE("V_REFOUT Enable", AC97_AD_MISC, 2, 1, 1), - AC97_SINGLE("High Pass Filter Enable", AC97_AD_TEST2, 12, 1, 1), + AC97_SINGLE("V_REFOUT Enable", AC97_AD_MISC, AC97_AD_VREFD_SHIFT, 1, 1), + AC97_SINGLE("High Pass Filter Enable", AC97_AD_TEST2, + AC97_AD_HPFD_SHIFT, 1, 1), AC97_SINGLE("Spread Front to Surround and Center/LFE", AC97_AD_MISC, 7, 1, 0), { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, -- cgit v1.2.3 From 54b71fba68efbf3ab89721a384df2ce757750979 Mon Sep 17 00:00:00 2001 From: Akira Takeuchi Date: Wed, 10 Dec 2008 12:43:34 +0000 Subject: MN10300: Fix __put_user_asm8() Fix __put_user_asm8() by jumping to the end label (3:) from the exception handler, rather than jumping back to retry the second store instruction (label 2:). Signed-off-by: Akira Takeuchi Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/asm-mn10300/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-mn10300/uaccess.h b/include/asm-mn10300/uaccess.h index 46b9b647f3c3..8a3a4dd55763 100644 --- a/include/asm-mn10300/uaccess.h +++ b/include/asm-mn10300/uaccess.h @@ -266,7 +266,7 @@ extern int __get_user_unknown(void); " .section .fixup,\"ax\" \n" \ "4: \n" \ " mov %5,%0 \n" \ - " jmp 2b \n" \ + " jmp 3b \n" \ " .previous \n" \ " .section __ex_table,\"a\"\n" \ " .balign 4 \n" \ -- cgit v1.2.3 From 2107fb8b5bf018be691afdd4c6ffaecf0c3307be Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Wed, 5 Nov 2008 00:35:38 +0000 Subject: smsc911x: add dynamic bus configuration Convert the driver to select 16-bit or 32-bit bus access at runtime, at a small performance cost. Signed-off-by: Steve Glendinning Acked-by: Catalin Marinas Signed-off-by: David S. Miller --- drivers/net/smsc911x.c | 158 +++++++++++++++++++++++------------------------ drivers/net/smsc911x.h | 1 - include/linux/smsc911x.h | 5 ++ 3 files changed, 84 insertions(+), 80 deletions(-) (limited to 'include') diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c index fe517880fc97..4b8ff843e300 100644 --- a/drivers/net/smsc911x.c +++ b/drivers/net/smsc911x.c @@ -77,19 +77,16 @@ struct smsc911x_data { unsigned int generation; /* device configuration (copied from platform_data during probe) */ - unsigned int irq_polarity; - unsigned int irq_type; - phy_interface_t phy_interface; + struct smsc911x_platform_config config; /* This needs to be acquired before calling any of below: * smsc911x_mac_read(), smsc911x_mac_write() */ spinlock_t mac_lock; -#if (!SMSC_CAN_USE_32BIT) - /* spinlock to ensure 16-bit accesses are serialised */ + /* spinlock to ensure 16-bit accesses are serialised. + * unused with a 32-bit bus */ spinlock_t dev_lock; -#endif struct phy_device *phy_dev; struct mii_bus *mii_bus; @@ -121,70 +118,56 @@ struct smsc911x_data { unsigned int hashlo; }; -#if SMSC_CAN_USE_32BIT - -static inline u32 smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg) -{ - return readl(pdata->ioaddr + reg); -} - -static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg, - u32 val) -{ - writel(val, pdata->ioaddr + reg); -} - -/* Writes a packet to the TX_DATA_FIFO */ -static inline void -smsc911x_tx_writefifo(struct smsc911x_data *pdata, unsigned int *buf, - unsigned int wordcount) -{ - writesl(pdata->ioaddr + TX_DATA_FIFO, buf, wordcount); -} - -/* Reads a packet out of the RX_DATA_FIFO */ -static inline void -smsc911x_rx_readfifo(struct smsc911x_data *pdata, unsigned int *buf, - unsigned int wordcount) -{ - readsl(pdata->ioaddr + RX_DATA_FIFO, buf, wordcount); -} - -#else /* SMSC_CAN_USE_32BIT */ - -/* These 16-bit access functions are significantly slower, due to the locking +/* The 16-bit access functions are significantly slower, due to the locking * necessary. If your bus hardware can be configured to do this for you * (in response to a single 32-bit operation from software), you should use * the 32-bit access functions instead. */ static inline u32 smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg) { - unsigned long flags; - u32 data; - - /* these two 16-bit reads must be performed consecutively, so must - * not be interrupted by our own ISR (which would start another - * read operation) */ - spin_lock_irqsave(&pdata->dev_lock, flags); - data = ((readw(pdata->ioaddr + reg) & 0xFFFF) | - ((readw(pdata->ioaddr + reg + 2) & 0xFFFF) << 16)); - spin_unlock_irqrestore(&pdata->dev_lock, flags); + if (pdata->config.flags & SMSC911X_USE_32BIT) + return readl(pdata->ioaddr + reg); + + if (pdata->config.flags & SMSC911X_USE_16BIT) { + u32 data; + unsigned long flags; + + /* these two 16-bit reads must be performed consecutively, so + * must not be interrupted by our own ISR (which would start + * another read operation) */ + spin_lock_irqsave(&pdata->dev_lock, flags); + data = ((readw(pdata->ioaddr + reg) & 0xFFFF) | + ((readw(pdata->ioaddr + reg + 2) & 0xFFFF) << 16)); + spin_unlock_irqrestore(&pdata->dev_lock, flags); + + return data; + } - return data; + BUG(); } static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg, u32 val) { - unsigned long flags; + if (pdata->config.flags & SMSC911X_USE_32BIT) { + writel(val, pdata->ioaddr + reg); + return; + } + + if (pdata->config.flags & SMSC911X_USE_16BIT) { + unsigned long flags; + + /* these two 16-bit writes must be performed consecutively, so + * must not be interrupted by our own ISR (which would start + * another read operation) */ + spin_lock_irqsave(&pdata->dev_lock, flags); + writew(val & 0xFFFF, pdata->ioaddr + reg); + writew((val >> 16) & 0xFFFF, pdata->ioaddr + reg + 2); + spin_unlock_irqrestore(&pdata->dev_lock, flags); + return; + } - /* these two 16-bit writes must be performed consecutively, so must - * not be interrupted by our own ISR (which would start another - * read operation) */ - spin_lock_irqsave(&pdata->dev_lock, flags); - writew(val & 0xFFFF, pdata->ioaddr + reg); - writew((val >> 16) & 0xFFFF, pdata->ioaddr + reg + 2); - spin_unlock_irqrestore(&pdata->dev_lock, flags); + BUG(); } /* Writes a packet to the TX_DATA_FIFO */ @@ -192,8 +175,18 @@ static inline void smsc911x_tx_writefifo(struct smsc911x_data *pdata, unsigned int *buf, unsigned int wordcount) { - while (wordcount--) - smsc911x_reg_write(pdata, TX_DATA_FIFO, *buf++); + if (pdata->config.flags & SMSC911X_USE_32BIT) { + writesl(pdata->ioaddr + TX_DATA_FIFO, buf, wordcount); + return; + } + + if (pdata->config.flags & SMSC911X_USE_16BIT) { + while (wordcount--) + smsc911x_reg_write(pdata, TX_DATA_FIFO, *buf++); + return; + } + + BUG(); } /* Reads a packet out of the RX_DATA_FIFO */ @@ -201,11 +194,19 @@ static inline void smsc911x_rx_readfifo(struct smsc911x_data *pdata, unsigned int *buf, unsigned int wordcount) { - while (wordcount--) - *buf++ = smsc911x_reg_read(pdata, RX_DATA_FIFO); -} + if (pdata->config.flags & SMSC911X_USE_32BIT) { + readsl(pdata->ioaddr + RX_DATA_FIFO, buf, wordcount); + return; + } -#endif /* SMSC_CAN_USE_32BIT */ + if (pdata->config.flags & SMSC911X_USE_16BIT) { + while (wordcount--) + *buf++ = smsc911x_reg_read(pdata, RX_DATA_FIFO); + return; + } + + BUG(); +} /* waits for MAC not busy, with timeout. Only called by smsc911x_mac_read * and smsc911x_mac_write, so assumes mac_lock is held */ @@ -790,7 +791,7 @@ static int smsc911x_mii_probe(struct net_device *dev) } phydev = phy_connect(dev, phydev->dev.bus_id, - &smsc911x_phy_adjust_link, 0, pdata->phy_interface); + &smsc911x_phy_adjust_link, 0, pdata->config.phy_interface); if (IS_ERR(phydev)) { pr_err("%s: Could not attach to PHY\n", dev->name); @@ -1205,14 +1206,14 @@ static int smsc911x_open(struct net_device *dev) /* Set interrupt deassertion to 100uS */ intcfg = ((10 << 24) | INT_CFG_IRQ_EN_); - if (pdata->irq_polarity) { + if (pdata->config.irq_polarity) { SMSC_TRACE(IFUP, "irq polarity: active high"); intcfg |= INT_CFG_IRQ_POL_; } else { SMSC_TRACE(IFUP, "irq polarity: active low"); } - if (pdata->irq_type) { + if (pdata->config.irq_type) { SMSC_TRACE(IFUP, "irq type: push-pull"); intcfg |= INT_CFG_IRQ_TYPE_; } else { @@ -1767,9 +1768,7 @@ static int __devinit smsc911x_init(struct net_device *dev) SMSC_TRACE(PROBE, "IRQ: %d", dev->irq); SMSC_TRACE(PROBE, "PHY will be autodetected."); -#if (!SMSC_CAN_USE_32BIT) spin_lock_init(&pdata->dev_lock); -#endif if (pdata->ioaddr == 0) { SMSC_WARNING(PROBE, "pdata->ioaddr: 0x00000000"); @@ -1910,6 +1909,7 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev) { struct net_device *dev; struct smsc911x_data *pdata; + struct smsc911x_platform_config *config = pdev->dev.platform_data; struct resource *res; unsigned int intcfg = 0; int res_size; @@ -1918,6 +1918,13 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev) pr_info("%s: Driver version %s.\n", SMSC_CHIPNAME, SMSC_DRV_VERSION); + /* platform data specifies irq & dynamic bus configuration */ + if (!pdev->dev.platform_data) { + pr_warning("%s: platform_data not provided\n", SMSC_CHIPNAME); + retval = -ENODEV; + goto out_0; + } + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smsc911x-memory"); if (!res) @@ -1949,15 +1956,8 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev) dev->irq = platform_get_irq(pdev, 0); pdata->ioaddr = ioremap_nocache(res->start, res_size); - /* copy config parameters across if present, otherwise pdata - * defaults to zeros */ - if (pdev->dev.platform_data) { - struct smsc911x_platform_config *config = - pdev->dev.platform_data; - pdata->irq_polarity = config->irq_polarity; - pdata->irq_type = config->irq_type; - pdata->phy_interface = config->phy_interface; - } + /* copy config parameters across to pdata */ + memcpy(&pdata->config, config, sizeof(pdata->config)); pdata->dev = dev; pdata->msg_enable = ((1 << debug) - 1); @@ -1974,10 +1974,10 @@ static int __devinit smsc911x_drv_probe(struct platform_device *pdev) goto out_unmap_io_3; /* configure irq polarity and type before connecting isr */ - if (pdata->irq_polarity == SMSC911X_IRQ_POLARITY_ACTIVE_HIGH) + if (pdata->config.irq_polarity == SMSC911X_IRQ_POLARITY_ACTIVE_HIGH) intcfg |= INT_CFG_IRQ_POL_; - if (pdata->irq_type == SMSC911X_IRQ_TYPE_PUSH_PULL) + if (pdata->config.irq_type == SMSC911X_IRQ_TYPE_PUSH_PULL) intcfg |= INT_CFG_IRQ_TYPE_; smsc911x_reg_write(pdata, INT_CFG, intcfg); diff --git a/drivers/net/smsc911x.h b/drivers/net/smsc911x.h index feb36de274ca..f818cf0415f7 100644 --- a/drivers/net/smsc911x.h +++ b/drivers/net/smsc911x.h @@ -21,7 +21,6 @@ #ifndef __SMSC911X_H__ #define __SMSC911X_H__ -#define SMSC_CAN_USE_32BIT 1 #define TX_FIFO_LOW_THRESHOLD ((u32)1600) #define SMSC911X_EEPROM_SIZE ((u32)7) #define USE_DEBUG 0 diff --git a/include/linux/smsc911x.h b/include/linux/smsc911x.h index 47c4ffd10dbb..1cbf0313adde 100644 --- a/include/linux/smsc911x.h +++ b/include/linux/smsc911x.h @@ -28,6 +28,7 @@ struct smsc911x_platform_config { unsigned int irq_polarity; unsigned int irq_type; + unsigned int flags; phy_interface_t phy_interface; }; @@ -39,4 +40,8 @@ struct smsc911x_platform_config { #define SMSC911X_IRQ_TYPE_OPEN_DRAIN 0 #define SMSC911X_IRQ_TYPE_PUSH_PULL 1 +/* Constants for flags */ +#define SMSC911X_USE_16BIT (BIT(0)) +#define SMSC911X_USE_32BIT (BIT(1)) + #endif /* __LINUX_SMSC911X_H__ */ -- cgit v1.2.3 From bd91b8bf372911c1e4d66d6bb44fe409349a6791 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:07:08 -0800 Subject: netns: ip6mr: allocate mroute6_socket per-namespace. Preliminary work to make IPv6 multicast forwarding netns-aware. Make IPv6 multicast forwarding mroute6_socket per-namespace, moves it into struct netns_ipv6. At the moment, mroute6_socket is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/linux/mroute6.h | 8 ++++++-- include/net/netns/ipv6.h | 3 +++ net/ipv6/ip6_output.c | 3 ++- net/ipv6/ip6mr.c | 22 ++++++++++------------ 4 files changed, 21 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 6f4c180179e2..2cd9901ee5c7 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -117,6 +117,7 @@ struct sioc_mif_req6 #include #include /* for struct sk_buff_head */ +#include #ifdef CONFIG_IPV6_MROUTE static inline int ip6_mroute_opt(int opt) @@ -232,10 +233,13 @@ struct rtmsg; extern int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait); #ifdef CONFIG_IPV6_MROUTE -extern struct sock *mroute6_socket; +static inline struct sock *mroute6_socket(struct net *net) +{ + return net->ipv6.mroute6_sk; +} extern int ip6mr_sk_done(struct sock *sk); #else -#define mroute6_socket NULL +static inline struct sock *mroute6_socket(struct net *net) { return NULL; } static inline int ip6mr_sk_done(struct sock *sk) { return 0; } #endif #endif diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 2932721180c0..8a0a67d073b3 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -55,5 +55,8 @@ struct netns_ipv6 { struct sock *ndisc_sk; struct sock *tcp_sk; struct sock *igmp_sk; +#ifdef CONFIG_IPV6_MROUTE + struct sock *mroute6_sk; +#endif }; #endif diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7d92fd97cfb9..4b15938bef4d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -137,7 +137,8 @@ static int ip6_output2(struct sk_buff *skb) struct inet6_dev *idev = ip6_dst_idev(skb->dst); if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && - ((mroute6_socket && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || + ((mroute6_socket(dev_net(dev)) && + !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr))) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d1008e6891e7..02163dbf84c3 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -49,9 +49,6 @@ #include #include -struct sock *mroute6_socket; - - /* Big lock, protecting vif table, mrt cache and mroute socket state. Note that the changes are semaphored via rtnl_lock. */ @@ -820,7 +817,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) skb_pull(skb, sizeof(struct ipv6hdr)); } - if (mroute6_socket == NULL) { + if (init_net.ipv6.mroute6_sk == NULL) { kfree_skb(skb); return -EINVAL; } @@ -828,7 +825,8 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) /* * Deliver to user space multicast routing algorithms */ - if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) { + ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb); + if (ret < 0) { if (net_ratelimit()) printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n"); kfree_skb(skb); @@ -1145,8 +1143,8 @@ static int ip6mr_sk_init(struct sock *sk) rtnl_lock(); write_lock_bh(&mrt_lock); - if (likely(mroute6_socket == NULL)) - mroute6_socket = sk; + if (likely(init_net.ipv6.mroute6_sk == NULL)) + init_net.ipv6.mroute6_sk = sk; else err = -EADDRINUSE; write_unlock_bh(&mrt_lock); @@ -1161,9 +1159,9 @@ int ip6mr_sk_done(struct sock *sk) int err = 0; rtnl_lock(); - if (sk == mroute6_socket) { + if (sk == init_net.ipv6.mroute6_sk) { write_lock_bh(&mrt_lock); - mroute6_socket = NULL; + init_net.ipv6.mroute6_sk = NULL; write_unlock_bh(&mrt_lock); mroute_clean_tables(sk); @@ -1189,7 +1187,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int mifi_t mifi; if (optname != MRT6_INIT) { - if (sk != mroute6_socket && !capable(CAP_NET_ADMIN)) + if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN)) return -EACCES; } @@ -1214,7 +1212,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int if (vif.mif6c_mifi >= MAXMIFS) return -ENFILE; rtnl_lock(); - ret = mif6_add(&vif, sk == mroute6_socket); + ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk); rtnl_unlock(); return ret; @@ -1242,7 +1240,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int if (optname == MRT6_DEL_MFC) ret = ip6mr_mfc_delete(&mfc); else - ret = ip6mr_mfc_add(&mfc, sk == mroute6_socket); + ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk); rtnl_unlock(); return ret; -- cgit v1.2.3 From 4e16880cb4225bfa68878ad5b2a9ded53657d054 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:15:08 -0800 Subject: netns: ip6mr: dynamically allocates vif6_table Preliminary work to make IPv6 multicast forwarding netns-aware. Dynamically allocates interface table vif6_table and moves it to struct netns_ipv6, and updates MIF_EXISTS() macro. At the moment, vif6_table is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 2 + net/ipv6/ip6mr.c | 107 ++++++++++++++++++++++++++++++----------------- 2 files changed, 70 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 8a0a67d073b3..4ab0cb01a7a5 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -57,6 +57,8 @@ struct netns_ipv6 { struct sock *igmp_sk; #ifdef CONFIG_IPV6_MROUTE struct sock *mroute6_sk; + struct mif_device *vif6_table; + int maxvif; #endif }; #endif diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 02163dbf84c3..bae3ef649664 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -59,10 +59,7 @@ static DEFINE_RWLOCK(mrt_lock); * Multicast router control variables */ -static struct mif_device vif6_table[MAXMIFS]; /* Devices */ -static int maxvif; - -#define MIF_EXISTS(idx) (vif6_table[idx].dev != NULL) +#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL) static int mroute_do_assert; /* Set in PIM assert */ #ifdef CONFIG_IPV6_PIMSM_V2 @@ -145,11 +142,11 @@ struct ipmr_vif_iter { static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter, loff_t pos) { - for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { - if (!MIF_EXISTS(iter->ct)) + for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) { + if (!MIF_EXISTS(&init_net, iter->ct)) continue; if (pos-- == 0) - return &vif6_table[iter->ct]; + return &init_net.ipv6.vif6_table[iter->ct]; } return NULL; } @@ -170,10 +167,10 @@ static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (v == SEQ_START_TOKEN) return ip6mr_vif_seq_idx(iter, 0); - while (++iter->ct < maxvif) { - if (!MIF_EXISTS(iter->ct)) + while (++iter->ct < init_net.ipv6.maxvif) { + if (!MIF_EXISTS(&init_net, iter->ct)) continue; - return &vif6_table[iter->ct]; + return &init_net.ipv6.vif6_table[iter->ct]; } return NULL; } @@ -195,7 +192,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", - vif - vif6_table, + vif - init_net.ipv6.vif6_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, vif->flags); @@ -305,7 +302,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) mfc->mfc_un.res.wrong_if); for (n = mfc->mfc_un.res.minvif; n < mfc->mfc_un.res.maxvif; n++) { - if (MIF_EXISTS(n) && + if (MIF_EXISTS(&init_net, n) && mfc->mfc_un.res.ttls[n] < 255) seq_printf(seq, " %2d:%-3d", @@ -374,7 +371,7 @@ static int pim6_rcv(struct sk_buff *skb) read_lock(&mrt_lock); if (reg_vif_num >= 0) - reg_dev = vif6_table[reg_vif_num].dev; + reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev; if (reg_dev) dev_hold(reg_dev); read_unlock(&mrt_lock); @@ -470,10 +467,10 @@ static int mif6_delete(int vifi) { struct mif_device *v; struct net_device *dev; - if (vifi < 0 || vifi >= maxvif) + if (vifi < 0 || vifi >= init_net.ipv6.maxvif) return -EADDRNOTAVAIL; - v = &vif6_table[vifi]; + v = &init_net.ipv6.vif6_table[vifi]; write_lock_bh(&mrt_lock); dev = v->dev; @@ -489,13 +486,13 @@ static int mif6_delete(int vifi) reg_vif_num = -1; #endif - if (vifi + 1 == maxvif) { + if (vifi + 1 == init_net.ipv6.maxvif) { int tmp; for (tmp = vifi - 1; tmp >= 0; tmp--) { - if (MIF_EXISTS(tmp)) + if (MIF_EXISTS(&init_net, tmp)) break; } - maxvif = tmp + 1; + init_net.ipv6.maxvif = tmp + 1; } write_unlock_bh(&mrt_lock); @@ -586,8 +583,9 @@ static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttl cache->mfc_un.res.maxvif = 0; memset(cache->mfc_un.res.ttls, 255, MAXMIFS); - for (vifi = 0; vifi < maxvif; vifi++) { - if (MIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { + for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) { + if (MIF_EXISTS(&init_net, vifi) && + ttls[vifi] && ttls[vifi] < 255) { cache->mfc_un.res.ttls[vifi] = ttls[vifi]; if (cache->mfc_un.res.minvif > vifi) cache->mfc_un.res.minvif = vifi; @@ -600,12 +598,12 @@ static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttl static int mif6_add(struct mif6ctl *vifc, int mrtsock) { int vifi = vifc->mif6c_mifi; - struct mif_device *v = &vif6_table[vifi]; + struct mif_device *v = &init_net.ipv6.vif6_table[vifi]; struct net_device *dev; int err; /* Is vif busy ? */ - if (MIF_EXISTS(vifi)) + if (MIF_EXISTS(&init_net, vifi)) return -EADDRINUSE; switch (vifc->mif6c_flags) { @@ -665,8 +663,8 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) if (v->flags & MIFF_REGISTER) reg_vif_num = vifi; #endif - if (vifi + 1 > maxvif) - maxvif = vifi + 1; + if (vifi + 1 > init_net.ipv6.maxvif) + init_net.ipv6.maxvif = vifi + 1; write_unlock_bh(&mrt_lock); return 0; } @@ -946,8 +944,8 @@ static int ip6mr_device_event(struct notifier_block *this, if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; - v = &vif6_table[0]; - for (ct = 0; ct < maxvif; ct++, v++) { + v = &init_net.ipv6.vif6_table[0]; + for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) { if (v->dev == dev) mif6_delete(ct); } @@ -962,6 +960,30 @@ static struct notifier_block ip6_mr_notifier = { * Setup for IP multicast routing */ +static int __net_init ip6mr_net_init(struct net *net) +{ + int err = 0; + + net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device), + GFP_KERNEL); + if (!net->ipv6.vif6_table) { + err = -ENOMEM; + goto fail; + } +fail: + return err; +} + +static void __net_exit ip6mr_net_exit(struct net *net) +{ + kfree(net->ipv6.vif6_table); +} + +static struct pernet_operations ip6mr_net_ops = { + .init = ip6mr_net_init, + .exit = ip6mr_net_exit, +}; + int __init ip6_mr_init(void) { int err; @@ -973,6 +995,10 @@ int __init ip6_mr_init(void) if (!mrt_cachep) return -ENOMEM; + err = register_pernet_subsys(&ip6mr_net_ops); + if (err) + goto reg_pernet_fail; + setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); err = register_netdevice_notifier(&ip6_mr_notifier); if (err) @@ -994,6 +1020,8 @@ proc_vif_fail: #endif reg_notif_fail: del_timer(&ipmr_expire_timer); + unregister_pernet_subsys(&ip6mr_net_ops); +reg_pernet_fail: kmem_cache_destroy(mrt_cachep); return err; } @@ -1006,6 +1034,7 @@ void ip6_mr_cleanup(void) #endif unregister_netdevice_notifier(&ip6_mr_notifier); del_timer(&ipmr_expire_timer); + unregister_pernet_subsys(&ip6mr_net_ops); kmem_cache_destroy(mrt_cachep); } @@ -1095,8 +1124,8 @@ static void mroute_clean_tables(struct sock *sk) /* * Shut down all active vif entries */ - for (i = 0; i < maxvif; i++) { - if (!(vif6_table[i].flags & VIFF_STATIC)) + for (i = 0; i < init_net.ipv6.maxvif; i++) { + if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC)) mif6_delete(i); } @@ -1346,11 +1375,11 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) case SIOCGETMIFCNT_IN6: if (copy_from_user(&vr, arg, sizeof(vr))) return -EFAULT; - if (vr.mifi >= maxvif) + if (vr.mifi >= init_net.ipv6.maxvif) return -EINVAL; read_lock(&mrt_lock); - vif = &vif6_table[vr.mifi]; - if (MIF_EXISTS(vr.mifi)) { + vif = &init_net.ipv6.vif6_table[vr.mifi]; + if (MIF_EXISTS(&init_net, vr.mifi)) { vr.icount = vif->pkt_in; vr.ocount = vif->pkt_out; vr.ibytes = vif->bytes_in; @@ -1401,7 +1430,7 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb) static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) { struct ipv6hdr *ipv6h; - struct mif_device *vif = &vif6_table[vifi]; + struct mif_device *vif = &init_net.ipv6.vif6_table[vifi]; struct net_device *dev; struct dst_entry *dst; struct flowi fl; @@ -1474,8 +1503,8 @@ out_free: static int ip6mr_find_vif(struct net_device *dev) { int ct; - for (ct = maxvif - 1; ct >= 0; ct--) { - if (vif6_table[ct].dev == dev) + for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) { + if (init_net.ipv6.vif6_table[ct].dev == dev) break; } return ct; @@ -1493,7 +1522,7 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (vif6_table[vif].dev != skb->dev) { + if (init_net.ipv6.vif6_table[vif].dev != skb->dev) { int true_vifi; cache->mfc_un.res.wrong_if++; @@ -1514,8 +1543,8 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) goto dont_forward; } - vif6_table[vif].pkt_in++; - vif6_table[vif].bytes_in += skb->len; + init_net.ipv6.vif6_table[vif].pkt_in++; + init_net.ipv6.vif6_table[vif].bytes_in += skb->len; /* * Forward the frame @@ -1583,7 +1612,7 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) { int ct; struct rtnexthop *nhp; - struct net_device *dev = vif6_table[c->mf6c_parent].dev; + struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev; u8 *b = skb_tail_pointer(skb); struct rtattr *mp_head; @@ -1599,7 +1628,7 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - nhp->rtnh_ifindex = vif6_table[ct].dev->ifindex; + nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex; nhp->rtnh_len = sizeof(*nhp); } } -- cgit v1.2.3 From 58701ad41105638baa0b38ffe9ac5b10469c1fd3 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:22:34 -0800 Subject: netns: ip6mr: store netns in struct mfc6_cache This patch stores into struct mfc6_cache the network namespace each mfc6_cache belongs to. The new member is mfc6_net. mfc6_net is assigned at cache allocation and doesn't change during the rest of the cache entry life. This will help to retrieve the current netns around the IPv6 multicast forwarding code. At the moment, all mfc6_cache are allocated in init_net. Changelog: ========== * Use write_pnet()/read_pnet() to set and get mfc6_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/linux/mroute6.h | 15 +++++++++++++++ net/ipv6/ip6mr.c | 26 +++++++++++++++++--------- 2 files changed, 32 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 2cd9901ee5c7..15d85fe12bbd 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -188,6 +188,9 @@ struct mif_device struct mfc6_cache { struct mfc6_cache *next; /* Next entry on cache line */ +#ifdef CONFIG_NET_NS + struct net *mfc6_net; +#endif struct in6_addr mf6c_mcastgrp; /* Group the entry belongs to */ struct in6_addr mf6c_origin; /* Source of packet */ mifi_t mf6c_parent; /* Source interface */ @@ -210,6 +213,18 @@ struct mfc6_cache } mfc_un; }; +static inline +struct net *mfc6_net(const struct mfc6_cache *mfc) +{ + return read_pnet(&mfc->mfc6_net); +} + +static inline +void mfc6_net_set(struct mfc6_cache *mfc, struct net *net) +{ + write_pnet(&mfc->mfc6_net, hold_net(net)); +} + #define MFC_STATIC 1 #define MFC_NOTIFY 2 diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index bae3ef649664..fab5aba28a20 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -506,6 +506,12 @@ static int mif6_delete(int vifi) return 0; } +static inline void ip6mr_cache_free(struct mfc6_cache *c) +{ + release_net(mfc6_net(c)); + kmem_cache_free(mrt_cachep, c); +} + /* Destroy an unresolved cache entry, killing queued skbs and reporting error to netlink readers. */ @@ -528,7 +534,7 @@ static void ip6mr_destroy_unres(struct mfc6_cache *c) kfree_skb(skb); } - kmem_cache_free(mrt_cachep, c); + ip6mr_cache_free(c); } @@ -685,22 +691,24 @@ static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_a /* * Allocate a multicast cache entry */ -static struct mfc6_cache *ip6mr_cache_alloc(void) +static struct mfc6_cache *ip6mr_cache_alloc(struct net *net) { struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (c == NULL) return NULL; c->mfc_un.res.minvif = MAXMIFS; + mfc6_net_set(c, net); return c; } -static struct mfc6_cache *ip6mr_cache_alloc_unres(void) +static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net) { struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); if (c == NULL) return NULL; skb_queue_head_init(&c->mfc_un.unres.unresolved); c->mfc_un.unres.expires = jiffies + 10 * HZ; + mfc6_net_set(c, net); return c; } @@ -856,7 +864,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) */ if (atomic_read(&cache_resolve_queue_len) >= 10 || - (c = ip6mr_cache_alloc_unres()) == NULL) { + (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) { spin_unlock_bh(&mfc_unres_lock); kfree_skb(skb); @@ -879,7 +887,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) */ spin_unlock_bh(&mfc_unres_lock); - kmem_cache_free(mrt_cachep, c); + ip6mr_cache_free(c); kfree_skb(skb); return err; } @@ -924,7 +932,7 @@ static int ip6mr_mfc_delete(struct mf6cctl *mfc) *cp = c->next; write_unlock_bh(&mrt_lock); - kmem_cache_free(mrt_cachep, c); + ip6mr_cache_free(c); return 0; } } @@ -1073,7 +1081,7 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) return -EINVAL; - c = ip6mr_cache_alloc(); + c = ip6mr_cache_alloc(&init_net); if (c == NULL) return -ENOMEM; @@ -1108,7 +1116,7 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) if (uc) { ip6mr_cache_resolve(uc, c); - kmem_cache_free(mrt_cachep, uc); + ip6mr_cache_free(uc); } return 0; } @@ -1145,7 +1153,7 @@ static void mroute_clean_tables(struct sock *sk) *cp = c->next; write_unlock_bh(&mrt_lock); - kmem_cache_free(mrt_cachep, c); + ip6mr_cache_free(c); } } -- cgit v1.2.3 From 4a6258a0e33d042e4c84d9dec25d45ddb40a70b3 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:24:07 -0800 Subject: netns: ip6mr: dynamically allocate mfc6_cache_array Preliminary work to make IPv6 multicast forwarding netns-aware. Dynamically allocates IPv6 multicast forwarding cache, mfc6_cache_array, and moves it to struct netns_ipv6. At the moment, mfc6_cache_array is only referenced in init_net. Replace 'ARRAY_SIZE(mfc6_cache_array)' with mfc6_cache_array size: MFC6_LINES. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 1 + net/ipv6/ip6mr.c | 47 +++++++++++++++++++++++++++++++---------------- 2 files changed, 32 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 4ab0cb01a7a5..14c1bbe68a85 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -57,6 +57,7 @@ struct netns_ipv6 { struct sock *igmp_sk; #ifdef CONFIG_IPV6_MROUTE struct sock *mroute6_sk; + struct mfc6_cache **mfc6_cache_array; struct mif_device *vif6_table; int maxvif; #endif diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index fab5aba28a20..287e526ba036 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -68,8 +68,6 @@ static int mroute_do_pim; #define mroute_do_pim 0 #endif -static struct mfc6_cache *mfc6_cache_array[MFC6_LINES]; /* Forwarding cache */ - static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */ static atomic_t cache_resolve_queue_len; /* Size of unresolved */ @@ -109,10 +107,11 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) { struct mfc6_cache *mfc; - it->cache = mfc6_cache_array; + it->cache = init_net.ipv6.mfc6_cache_array; read_lock(&mrt_lock); - for (it->ct = 0; it->ct < ARRAY_SIZE(mfc6_cache_array); it->ct++) - for (mfc = mfc6_cache_array[it->ct]; mfc; mfc = mfc->next) + for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) + for (mfc = init_net.ipv6.mfc6_cache_array[it->ct]; + mfc; mfc = mfc->next) if (pos-- == 0) return mfc; read_unlock(&mrt_lock); @@ -243,10 +242,10 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (it->cache == &mfc_unres_queue) goto end_of_list; - BUG_ON(it->cache != mfc6_cache_array); + BUG_ON(it->cache != init_net.ipv6.mfc6_cache_array); - while (++it->ct < ARRAY_SIZE(mfc6_cache_array)) { - mfc = mfc6_cache_array[it->ct]; + while (++it->ct < MFC6_LINES) { + mfc = init_net.ipv6.mfc6_cache_array[it->ct]; if (mfc) return mfc; } @@ -274,7 +273,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) if (it->cache == &mfc_unres_queue) spin_unlock_bh(&mfc_unres_lock); - else if (it->cache == mfc6_cache_array) + else if (it->cache == init_net.ipv6.mfc6_cache_array) read_unlock(&mrt_lock); } @@ -680,7 +679,7 @@ static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_a int line = MFC6_HASH(mcastgrp, origin); struct mfc6_cache *c; - for (c = mfc6_cache_array[line]; c; c = c->next) { + for (c = init_net.ipv6.mfc6_cache_array[line]; c; c = c->next) { if (ipv6_addr_equal(&c->mf6c_origin, origin) && ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) break; @@ -925,7 +924,8 @@ static int ip6mr_mfc_delete(struct mf6cctl *mfc) line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); - for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { + for (cp = &init_net.ipv6.mfc6_cache_array[line]; + (c = *cp) != NULL; cp = &c->next) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { write_lock_bh(&mrt_lock); @@ -978,12 +978,26 @@ static int __net_init ip6mr_net_init(struct net *net) err = -ENOMEM; goto fail; } + + /* Forwarding cache */ + net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES, + sizeof(struct mfc6_cache *), + GFP_KERNEL); + if (!net->ipv6.mfc6_cache_array) { + err = -ENOMEM; + goto fail_mfc6_cache; + } + return 0; + +fail_mfc6_cache: + kfree(net->ipv6.vif6_table); fail: return err; } static void __net_exit ip6mr_net_exit(struct net *net) { + kfree(net->ipv6.mfc6_cache_array); kfree(net->ipv6.vif6_table); } @@ -1062,7 +1076,8 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); - for (cp = &mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { + for (cp = &init_net.ipv6.mfc6_cache_array[line]; + (c = *cp) != NULL; cp = &c->next) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) break; @@ -1093,8 +1108,8 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) c->mfc_flags |= MFC_STATIC; write_lock_bh(&mrt_lock); - c->next = mfc6_cache_array[line]; - mfc6_cache_array[line] = c; + c->next = init_net.ipv6.mfc6_cache_array[line]; + init_net.ipv6.mfc6_cache_array[line] = c; write_unlock_bh(&mrt_lock); /* @@ -1140,10 +1155,10 @@ static void mroute_clean_tables(struct sock *sk) /* * Wipe the cache */ - for (i = 0; i < ARRAY_SIZE(mfc6_cache_array); i++) { + for (i = 0; i < MFC6_LINES; i++) { struct mfc6_cache *c, **cp; - cp = &mfc6_cache_array[i]; + cp = &init_net.ipv6.mfc6_cache_array[i]; while ((c = *cp) != NULL) { if (c->mfc_flags & MFC_STATIC) { cp = &c->next; -- cgit v1.2.3 From 4045e57c19bee150370390545ee8a933b3f7a18d Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:27:21 -0800 Subject: netns: ip6mr: declare counter cache_resolve_queue_len per-namespace Preliminary work to make IPv6 multicast forwarding netns-aware. Declare variable cache_resolve_queue_len per-namespace: moves it into struct netns_ipv6. This variable counts the number of unresolved cache entries queued in the list mfc_unres_queue. This list is kept global to all netns as the number of entries per namespace is limited to 10 (hardcoded in routine ip6mr_cache_unresolved). Entries belonging to different namespaces in mfc_unres_queue will be identified by matching the mfc_net member introduced previously in struct mfc6_cache. Keeping this list global to all netns, also allows us to keep a single timer (ipmr_expire_timer) to handle their expiration. In some places cache_resolve_queue_len value was tested for arming or deleting the timer. These tests were equivalent to testing mfc_unres_queue value instead and are replaced in this patch. At the moment, cache_resolve_queue_len is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 1 + net/ipv6/ip6mr.c | 40 +++++++++++++++++++++------------------- 2 files changed, 22 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 14c1bbe68a85..30572f3f9781 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -60,6 +60,7 @@ struct netns_ipv6 { struct mfc6_cache **mfc6_cache_array; struct mif_device *vif6_table; int maxvif; + atomic_t cache_resolve_queue_len; #endif }; #endif diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 287e526ba036..077c8198eb53 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -69,7 +69,6 @@ static int mroute_do_pim; #endif static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */ -static atomic_t cache_resolve_queue_len; /* Size of unresolved */ /* Special spinlock for queue of unresolved entries */ static DEFINE_SPINLOCK(mfc_unres_lock); @@ -519,7 +518,7 @@ static void ip6mr_destroy_unres(struct mfc6_cache *c) { struct sk_buff *skb; - atomic_dec(&cache_resolve_queue_len); + atomic_dec(&init_net.ipv6.cache_resolve_queue_len); while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { if (ipv6_hdr(skb)->version == 0) { @@ -561,7 +560,7 @@ static void ipmr_do_expire_process(unsigned long dummy) ip6mr_destroy_unres(c); } - if (atomic_read(&cache_resolve_queue_len)) + if (mfc_unres_queue != NULL) mod_timer(&ipmr_expire_timer, jiffies + expires); } @@ -572,7 +571,7 @@ static void ipmr_expire_process(unsigned long dummy) return; } - if (atomic_read(&cache_resolve_queue_len)) + if (mfc_unres_queue != NULL) ipmr_do_expire_process(dummy); spin_unlock(&mfc_unres_lock); @@ -852,7 +851,8 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) spin_lock_bh(&mfc_unres_lock); for (c = mfc_unres_queue; c; c = c->next) { - if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && + if (net_eq(mfc6_net(c), &init_net) && + ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) break; } @@ -862,7 +862,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) * Create a new entry if allowable */ - if (atomic_read(&cache_resolve_queue_len) >= 10 || + if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) >= 10 || (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) { spin_unlock_bh(&mfc_unres_lock); @@ -891,7 +891,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) return err; } - atomic_inc(&cache_resolve_queue_len); + atomic_inc(&init_net.ipv6.cache_resolve_queue_len); c->next = mfc_unres_queue; mfc_unres_queue = c; @@ -1119,14 +1119,16 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) spin_lock_bh(&mfc_unres_lock); for (cp = &mfc_unres_queue; (uc = *cp) != NULL; cp = &uc->next) { - if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && + if (net_eq(mfc6_net(uc), &init_net) && + ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { *cp = uc->next; - if (atomic_dec_and_test(&cache_resolve_queue_len)) - del_timer(&ipmr_expire_timer); + atomic_dec(&init_net.ipv6.cache_resolve_queue_len); break; } } + if (mfc_unres_queue == NULL) + del_timer(&ipmr_expire_timer); spin_unlock_bh(&mfc_unres_lock); if (uc) { @@ -1172,18 +1174,18 @@ static void mroute_clean_tables(struct sock *sk) } } - if (atomic_read(&cache_resolve_queue_len) != 0) { - struct mfc6_cache *c; + if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) != 0) { + struct mfc6_cache *c, **cp; spin_lock_bh(&mfc_unres_lock); - while (mfc_unres_queue != NULL) { - c = mfc_unres_queue; - mfc_unres_queue = c->next; - spin_unlock_bh(&mfc_unres_lock); - + cp = &mfc_unres_queue; + while ((c = *cp) != NULL) { + if (!net_eq(mfc6_net(c), &init_net)) { + cp = &c->next; + continue; + } + *cp = c->next; ip6mr_destroy_unres(c); - - spin_lock_bh(&mfc_unres_lock); } spin_unlock_bh(&mfc_unres_lock); } -- cgit v1.2.3 From a21f3f997c73ced682129aedd372bb6b53041510 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:28:44 -0800 Subject: netns: ip6mr: declare mroute_do_assert and mroute_do_pim per-namespace Preliminary work to make IPv6 multicast forwarding netns-aware. Declare IPv6 multicast forwarding variables 'mroute_do_assert' and 'mroute_do_pim' per-namespace in struct netns_ipv6. At the moment, these variables are only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 2 ++ net/ipv6/ip6mr.c | 26 ++++++++++---------------- 2 files changed, 12 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 30572f3f9781..919cb0f20055 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -61,6 +61,8 @@ struct netns_ipv6 { struct mif_device *vif6_table; int maxvif; atomic_t cache_resolve_queue_len; + int mroute_do_assert; + int mroute_do_pim; #endif }; #endif diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 077c8198eb53..2d2ac23b8606 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -61,13 +61,6 @@ static DEFINE_RWLOCK(mrt_lock); #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL) -static int mroute_do_assert; /* Set in PIM assert */ -#ifdef CONFIG_IPV6_PIMSM_V2 -static int mroute_do_pim; -#else -#define mroute_do_pim 0 -#endif - static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */ /* Special spinlock for queue of unresolved entries */ @@ -1306,7 +1299,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int int v; if (get_user(v, (int __user *)optval)) return -EFAULT; - mroute_do_assert = !!v; + init_net.ipv6.mroute_do_assert = !!v; return 0; } @@ -1319,10 +1312,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int v = !!v; rtnl_lock(); ret = 0; - if (v != mroute_do_pim) { - mroute_do_pim = v; - mroute_do_assert = v; - if (mroute_do_pim) + if (v != init_net.ipv6.mroute_do_pim) { + init_net.ipv6.mroute_do_pim = v; + init_net.ipv6.mroute_do_assert = v; + if (init_net.ipv6.mroute_do_pim) ret = inet6_add_protocol(&pim6_protocol, IPPROTO_PIM); else @@ -1361,11 +1354,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, break; #ifdef CONFIG_IPV6_PIMSM_V2 case MRT6_PIM: - val = mroute_do_pim; + val = init_net.ipv6.mroute_do_pim; break; #endif case MRT6_ASSERT: - val = mroute_do_assert; + val = init_net.ipv6.mroute_do_assert; break; default: return -ENOPROTOOPT; @@ -1553,13 +1546,14 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) cache->mfc_un.res.wrong_if++; true_vifi = ip6mr_find_vif(skb->dev); - if (true_vifi >= 0 && mroute_do_assert && + if (true_vifi >= 0 && init_net.ipv6.mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, so that we cannot check that packet arrived on an oif. It is bad, but otherwise we would need to move pretty large chunk of pimd to kernel. Ough... --ANK */ - (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && + (init_net.ipv6.mroute_do_pim || + cache->mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { cache->mfc_un.res.last_assert = jiffies; -- cgit v1.2.3 From 950d5704e5daa1f90bcd75b99163491e7b249169 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:29:24 -0800 Subject: netns: ip6mr: declare reg_vif_num per-namespace Preliminary work to make IPv6 multicast forwarding netns-aware. Declare variable 'reg_vif_num' per-namespace, moves into struct netns_ipv6. At the moment, this variable is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/net/netns/ipv6.h | 3 +++ net/ipv6/ip6mr.c | 19 +++++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 919cb0f20055..afab4e4cbac7 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -63,6 +63,9 @@ struct netns_ipv6 { atomic_t cache_resolve_queue_len; int mroute_do_assert; int mroute_do_pim; +#ifdef CONFIG_IPV6_PIMSM_V2 + int mroute_reg_vif_num; +#endif #endif }; #endif diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 2d2ac23b8606..8e693859ea03 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -333,13 +333,13 @@ static struct file_operations ip6mr_mfc_fops = { #endif #ifdef CONFIG_IPV6_PIMSM_V2 -static int reg_vif_num = -1; static int pim6_rcv(struct sk_buff *skb) { struct pimreghdr *pim; struct ipv6hdr *encap; struct net_device *reg_dev = NULL; + int reg_vif_num = init_net.ipv6.mroute_reg_vif_num; if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) goto drop; @@ -401,7 +401,7 @@ static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; - ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT); + ip6mr_cache_report(skb, init_net.ipv6.mroute_reg_vif_num, MRT6MSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0; @@ -473,8 +473,8 @@ static int mif6_delete(int vifi) } #ifdef CONFIG_IPV6_PIMSM_V2 - if (vifi == reg_vif_num) - reg_vif_num = -1; + if (vifi == init_net.ipv6.mroute_reg_vif_num) + init_net.ipv6.mroute_reg_vif_num = -1; #endif if (vifi + 1 == init_net.ipv6.maxvif) { @@ -610,7 +610,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) * Special Purpose VIF in PIM * All the packets will be sent to the daemon */ - if (reg_vif_num >= 0) + if (init_net.ipv6.mroute_reg_vif_num >= 0) return -EADDRINUSE; dev = ip6mr_reg_vif(); if (!dev) @@ -658,7 +658,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) v->dev = dev; #ifdef CONFIG_IPV6_PIMSM_V2 if (v->flags & MIFF_REGISTER) - reg_vif_num = vifi; + init_net.ipv6.mroute_reg_vif_num = vifi; #endif if (vifi + 1 > init_net.ipv6.maxvif) init_net.ipv6.maxvif = vifi + 1; @@ -777,7 +777,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) msg = (struct mrt6msg *)skb_transport_header(skb); msg->im6_mbz = 0; msg->im6_msgtype = MRT6MSG_WHOLEPKT; - msg->im6_mif = reg_vif_num; + msg->im6_mif = init_net.ipv6.mroute_reg_vif_num; msg->im6_pad = 0; ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); @@ -964,7 +964,6 @@ static struct notifier_block ip6_mr_notifier = { static int __net_init ip6mr_net_init(struct net *net) { int err = 0; - net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device), GFP_KERNEL); if (!net->ipv6.vif6_table) { @@ -980,6 +979,10 @@ static int __net_init ip6mr_net_init(struct net *net) err = -ENOMEM; goto fail_mfc6_cache; } + +#ifdef CONFIG_IPV6_PIMSM_V2 + net->ipv6.mroute_reg_vif_num = -1; +#endif return 0; fail_mfc6_cache: -- cgit v1.2.3 From 8229efdaef1e7913ae1712c0ba752f267e5fcd5e Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Wed, 10 Dec 2008 16:30:15 -0800 Subject: netns: ip6mr: enable namespace support in ipv6 multicast forwarding code This last patch makes the appropriate changes to use and propagate the network namespace where needed in IPv6 multicast forwarding code. This consists mainly in replacing all the remaining init_net occurences with current netns pointer retrieved from sockets, net devices or mfc6_caches depending on the routines' contexts. Some routines receive a new 'struct net' parameter to propagate the current netns: * ip6mr_get_route * ip6mr_cache_report * ip6mr_cache_find * ip6mr_cache_unresolved * mif6_add/mif6_delete * ip6mr_mfc_add/ip6mr_mfc_delete * ip6mr_reg_vif All the IPv6 multicast forwarding variables moved to struct netns_ipv6 by the previous patches are now referenced in the correct namespace. Changelog: ========== * Take into account the net associated to mfc6_cache when matching entries in mfc_unres_queue list. * Call mroute_clean_tables() in ip6mr_net_exit() to free memory allocated per-namespace. * Call dev_net_set() in ip6mr_reg_vif() to initialize dev->nd_net correctly. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- include/linux/mroute6.h | 3 +- net/ipv6/ip6mr.c | 226 +++++++++++++++++++++++++++--------------------- net/ipv6/route.c | 2 +- 3 files changed, 129 insertions(+), 102 deletions(-) (limited to 'include') diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h index 15d85fe12bbd..5375faca1f72 100644 --- a/include/linux/mroute6.h +++ b/include/linux/mroute6.h @@ -245,7 +245,8 @@ void mfc6_net_set(struct mfc6_cache *mfc, struct net *net) #ifdef __KERNEL__ struct rtmsg; -extern int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait); +extern int ip6mr_get_route(struct net *net, struct sk_buff *skb, + struct rtmsg *rtm, int nowait); #ifdef CONFIG_IPV6_MROUTE static inline struct sock *mroute6_socket(struct net *net) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d619faf68d98..9eed2422b3e3 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -77,8 +77,10 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __read_mostly; static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache); -static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert); +static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, + mifi_t mifi, int assert); static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm); +static void mroute_clean_tables(struct net *net); #ifdef CONFIG_IPV6_PIMSM_V2 static struct inet6_protocol pim6_protocol; @@ -354,7 +356,8 @@ static int pim6_rcv(struct sk_buff *skb) struct pimreghdr *pim; struct ipv6hdr *encap; struct net_device *reg_dev = NULL; - int reg_vif_num = init_net.ipv6.mroute_reg_vif_num; + struct net *net = dev_net(skb->dev); + int reg_vif_num = net->ipv6.mroute_reg_vif_num; if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) goto drop; @@ -377,7 +380,7 @@ static int pim6_rcv(struct sk_buff *skb) read_lock(&mrt_lock); if (reg_vif_num >= 0) - reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev; + reg_dev = net->ipv6.vif6_table[reg_vif_num].dev; if (reg_dev) dev_hold(reg_dev); read_unlock(&mrt_lock); @@ -413,10 +416,13 @@ static struct inet6_protocol pim6_protocol = { static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { + struct net *net = dev_net(dev); + read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; - ip6mr_cache_report(skb, init_net.ipv6.mroute_reg_vif_num, MRT6MSG_WHOLEPKT); + ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num, + MRT6MSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0; @@ -435,7 +441,7 @@ static void reg_vif_setup(struct net_device *dev) dev->destructor = free_netdev; } -static struct net_device *ip6mr_reg_vif(void) +static struct net_device *ip6mr_reg_vif(struct net *net) { struct net_device *dev; @@ -443,6 +449,8 @@ static struct net_device *ip6mr_reg_vif(void) if (dev == NULL) return NULL; + dev_net_set(dev, net); + if (register_netdevice(dev)) { free_netdev(dev); return NULL; @@ -469,14 +477,14 @@ failure: * Delete a VIF entry */ -static int mif6_delete(int vifi) +static int mif6_delete(struct net *net, int vifi) { struct mif_device *v; struct net_device *dev; - if (vifi < 0 || vifi >= init_net.ipv6.maxvif) + if (vifi < 0 || vifi >= net->ipv6.maxvif) return -EADDRNOTAVAIL; - v = &init_net.ipv6.vif6_table[vifi]; + v = &net->ipv6.vif6_table[vifi]; write_lock_bh(&mrt_lock); dev = v->dev; @@ -488,17 +496,17 @@ static int mif6_delete(int vifi) } #ifdef CONFIG_IPV6_PIMSM_V2 - if (vifi == init_net.ipv6.mroute_reg_vif_num) - init_net.ipv6.mroute_reg_vif_num = -1; + if (vifi == net->ipv6.mroute_reg_vif_num) + net->ipv6.mroute_reg_vif_num = -1; #endif - if (vifi + 1 == init_net.ipv6.maxvif) { + if (vifi + 1 == net->ipv6.maxvif) { int tmp; for (tmp = vifi - 1; tmp >= 0; tmp--) { - if (MIF_EXISTS(&init_net, tmp)) + if (MIF_EXISTS(net, tmp)) break; } - init_net.ipv6.maxvif = tmp + 1; + net->ipv6.maxvif = tmp + 1; } write_unlock_bh(&mrt_lock); @@ -525,8 +533,9 @@ static inline void ip6mr_cache_free(struct mfc6_cache *c) static void ip6mr_destroy_unres(struct mfc6_cache *c) { struct sk_buff *skb; + struct net *net = mfc6_net(c); - atomic_dec(&init_net.ipv6.cache_resolve_queue_len); + atomic_dec(&net->ipv6.cache_resolve_queue_len); while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) { if (ipv6_hdr(skb)->version == 0) { @@ -535,7 +544,7 @@ static void ip6mr_destroy_unres(struct mfc6_cache *c) nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; - rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).pid); } else kfree_skb(skb); } @@ -590,13 +599,14 @@ static void ipmr_expire_process(unsigned long dummy) static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls) { int vifi; + struct net *net = mfc6_net(cache); cache->mfc_un.res.minvif = MAXMIFS; cache->mfc_un.res.maxvif = 0; memset(cache->mfc_un.res.ttls, 255, MAXMIFS); - for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) { - if (MIF_EXISTS(&init_net, vifi) && + for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) { + if (MIF_EXISTS(net, vifi) && ttls[vifi] && ttls[vifi] < 255) { cache->mfc_un.res.ttls[vifi] = ttls[vifi]; if (cache->mfc_un.res.minvif > vifi) @@ -607,15 +617,15 @@ static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttl } } -static int mif6_add(struct mif6ctl *vifc, int mrtsock) +static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock) { int vifi = vifc->mif6c_mifi; - struct mif_device *v = &init_net.ipv6.vif6_table[vifi]; + struct mif_device *v = &net->ipv6.vif6_table[vifi]; struct net_device *dev; int err; /* Is vif busy ? */ - if (MIF_EXISTS(&init_net, vifi)) + if (MIF_EXISTS(net, vifi)) return -EADDRINUSE; switch (vifc->mif6c_flags) { @@ -625,9 +635,9 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) * Special Purpose VIF in PIM * All the packets will be sent to the daemon */ - if (init_net.ipv6.mroute_reg_vif_num >= 0) + if (net->ipv6.mroute_reg_vif_num >= 0) return -EADDRINUSE; - dev = ip6mr_reg_vif(); + dev = ip6mr_reg_vif(net); if (!dev) return -ENOBUFS; err = dev_set_allmulti(dev, 1); @@ -639,7 +649,7 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) break; #endif case 0: - dev = dev_get_by_index(&init_net, vifc->mif6c_pifi); + dev = dev_get_by_index(net, vifc->mif6c_pifi); if (!dev) return -EADDRNOTAVAIL; err = dev_set_allmulti(dev, 1); @@ -673,20 +683,22 @@ static int mif6_add(struct mif6ctl *vifc, int mrtsock) v->dev = dev; #ifdef CONFIG_IPV6_PIMSM_V2 if (v->flags & MIFF_REGISTER) - init_net.ipv6.mroute_reg_vif_num = vifi; + net->ipv6.mroute_reg_vif_num = vifi; #endif - if (vifi + 1 > init_net.ipv6.maxvif) - init_net.ipv6.maxvif = vifi + 1; + if (vifi + 1 > net->ipv6.maxvif) + net->ipv6.maxvif = vifi + 1; write_unlock_bh(&mrt_lock); return 0; } -static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp) +static struct mfc6_cache *ip6mr_cache_find(struct net *net, + struct in6_addr *origin, + struct in6_addr *mcastgrp) { int line = MFC6_HASH(mcastgrp, origin); struct mfc6_cache *c; - for (c = init_net.ipv6.mfc6_cache_array[line]; c; c = c->next) { + for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) { if (ipv6_addr_equal(&c->mf6c_origin, origin) && ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) break; @@ -743,7 +755,7 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c) skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; } - err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); + err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid); } else ip6_mr_forward(skb, c); } @@ -756,7 +768,8 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c) * Called under mrt_lock. */ -static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) +static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi, + int assert) { struct sk_buff *skb; struct mrt6msg *msg; @@ -792,7 +805,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) msg = (struct mrt6msg *)skb_transport_header(skb); msg->im6_mbz = 0; msg->im6_msgtype = MRT6MSG_WHOLEPKT; - msg->im6_mif = init_net.ipv6.mroute_reg_vif_num; + msg->im6_mif = net->ipv6.mroute_reg_vif_num; msg->im6_pad = 0; ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr); ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr); @@ -829,7 +842,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) skb_pull(skb, sizeof(struct ipv6hdr)); } - if (init_net.ipv6.mroute6_sk == NULL) { + if (net->ipv6.mroute6_sk == NULL) { kfree_skb(skb); return -EINVAL; } @@ -837,7 +850,7 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) /* * Deliver to user space multicast routing algorithms */ - ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb); + ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb); if (ret < 0) { if (net_ratelimit()) printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n"); @@ -852,14 +865,14 @@ static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert) */ static int -ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) +ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb) { int err; struct mfc6_cache *c; spin_lock_bh(&mfc_unres_lock); for (c = mfc_unres_queue; c; c = c->next) { - if (net_eq(mfc6_net(c), &init_net) && + if (net_eq(mfc6_net(c), net) && ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) && ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) break; @@ -870,8 +883,8 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) * Create a new entry if allowable */ - if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) >= 10 || - (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) { + if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 || + (c = ip6mr_cache_alloc_unres(net)) == NULL) { spin_unlock_bh(&mfc_unres_lock); kfree_skb(skb); @@ -888,7 +901,8 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) /* * Reflect first query at pim6sd */ - if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) { + err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE); + if (err < 0) { /* If the report failed throw the cache entry out - Brad Parker */ @@ -899,7 +913,7 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) return err; } - atomic_inc(&init_net.ipv6.cache_resolve_queue_len); + atomic_inc(&net->ipv6.cache_resolve_queue_len); c->next = mfc_unres_queue; mfc_unres_queue = c; @@ -925,14 +939,14 @@ ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb) * MFC6 cache manipulation by user space */ -static int ip6mr_mfc_delete(struct mf6cctl *mfc) +static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc) { int line; struct mfc6_cache *c, **cp; line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); - for (cp = &init_net.ipv6.mfc6_cache_array[line]; + for (cp = &net->ipv6.mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) { @@ -951,19 +965,17 @@ static int ip6mr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; + struct net *net = dev_net(dev); struct mif_device *v; int ct; - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; - v = &init_net.ipv6.vif6_table[0]; - for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) { + v = &net->ipv6.vif6_table[0]; + for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) { if (v->dev == dev) - mif6_delete(ct); + mif6_delete(net, ct); } return NOTIFY_DONE; } @@ -1026,6 +1038,7 @@ static void __net_exit ip6mr_net_exit(struct net *net) proc_net_remove(net, "ip6_mr_cache"); proc_net_remove(net, "ip6_mr_vif"); #endif + mroute_clean_tables(net); kfree(net->ipv6.mfc6_cache_array); kfree(net->ipv6.vif6_table); } @@ -1071,7 +1084,7 @@ void ip6_mr_cleanup(void) kmem_cache_destroy(mrt_cachep); } -static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) +static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock) { int line; struct mfc6_cache *uc, *c, **cp; @@ -1087,7 +1100,7 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr); - for (cp = &init_net.ipv6.mfc6_cache_array[line]; + for (cp = &net->ipv6.mfc6_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) && ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) @@ -1107,7 +1120,7 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr)) return -EINVAL; - c = ip6mr_cache_alloc(&init_net); + c = ip6mr_cache_alloc(net); if (c == NULL) return -ENOMEM; @@ -1119,8 +1132,8 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) c->mfc_flags |= MFC_STATIC; write_lock_bh(&mrt_lock); - c->next = init_net.ipv6.mfc6_cache_array[line]; - init_net.ipv6.mfc6_cache_array[line] = c; + c->next = net->ipv6.mfc6_cache_array[line]; + net->ipv6.mfc6_cache_array[line] = c; write_unlock_bh(&mrt_lock); /* @@ -1130,11 +1143,11 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) spin_lock_bh(&mfc_unres_lock); for (cp = &mfc_unres_queue; (uc = *cp) != NULL; cp = &uc->next) { - if (net_eq(mfc6_net(uc), &init_net) && + if (net_eq(mfc6_net(uc), net) && ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) && ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) { *cp = uc->next; - atomic_dec(&init_net.ipv6.cache_resolve_queue_len); + atomic_dec(&net->ipv6.cache_resolve_queue_len); break; } } @@ -1153,16 +1166,16 @@ static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock) * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct sock *sk) +static void mroute_clean_tables(struct net *net) { int i; /* * Shut down all active vif entries */ - for (i = 0; i < init_net.ipv6.maxvif; i++) { - if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC)) - mif6_delete(i); + for (i = 0; i < net->ipv6.maxvif; i++) { + if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC)) + mif6_delete(net, i); } /* @@ -1171,7 +1184,7 @@ static void mroute_clean_tables(struct sock *sk) for (i = 0; i < MFC6_LINES; i++) { struct mfc6_cache *c, **cp; - cp = &init_net.ipv6.mfc6_cache_array[i]; + cp = &net->ipv6.mfc6_cache_array[i]; while ((c = *cp) != NULL) { if (c->mfc_flags & MFC_STATIC) { cp = &c->next; @@ -1185,13 +1198,13 @@ static void mroute_clean_tables(struct sock *sk) } } - if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) != 0) { + if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) { struct mfc6_cache *c, **cp; spin_lock_bh(&mfc_unres_lock); cp = &mfc_unres_queue; while ((c = *cp) != NULL) { - if (!net_eq(mfc6_net(c), &init_net)) { + if (!net_eq(mfc6_net(c), net)) { cp = &c->next; continue; } @@ -1205,11 +1218,12 @@ static void mroute_clean_tables(struct sock *sk) static int ip6mr_sk_init(struct sock *sk) { int err = 0; + struct net *net = sock_net(sk); rtnl_lock(); write_lock_bh(&mrt_lock); - if (likely(init_net.ipv6.mroute6_sk == NULL)) - init_net.ipv6.mroute6_sk = sk; + if (likely(net->ipv6.mroute6_sk == NULL)) + net->ipv6.mroute6_sk = sk; else err = -EADDRINUSE; write_unlock_bh(&mrt_lock); @@ -1222,14 +1236,15 @@ static int ip6mr_sk_init(struct sock *sk) int ip6mr_sk_done(struct sock *sk) { int err = 0; + struct net *net = sock_net(sk); rtnl_lock(); - if (sk == init_net.ipv6.mroute6_sk) { + if (sk == net->ipv6.mroute6_sk) { write_lock_bh(&mrt_lock); - init_net.ipv6.mroute6_sk = NULL; + net->ipv6.mroute6_sk = NULL; write_unlock_bh(&mrt_lock); - mroute_clean_tables(sk); + mroute_clean_tables(net); } else err = -EACCES; rtnl_unlock(); @@ -1250,9 +1265,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int struct mif6ctl vif; struct mf6cctl mfc; mifi_t mifi; + struct net *net = sock_net(sk); if (optname != MRT6_INIT) { - if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN)) + if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN)) return -EACCES; } @@ -1277,7 +1293,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int if (vif.mif6c_mifi >= MAXMIFS) return -ENFILE; rtnl_lock(); - ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk); + ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk); rtnl_unlock(); return ret; @@ -1287,7 +1303,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int if (copy_from_user(&mifi, optval, sizeof(mifi_t))) return -EFAULT; rtnl_lock(); - ret = mif6_delete(mifi); + ret = mif6_delete(net, mifi); rtnl_unlock(); return ret; @@ -1303,9 +1319,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int return -EFAULT; rtnl_lock(); if (optname == MRT6_DEL_MFC) - ret = ip6mr_mfc_delete(&mfc); + ret = ip6mr_mfc_delete(net, &mfc); else - ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk); + ret = ip6mr_mfc_add(net, &mfc, + sk == net->ipv6.mroute6_sk); rtnl_unlock(); return ret; @@ -1317,7 +1334,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int int v; if (get_user(v, (int __user *)optval)) return -EFAULT; - init_net.ipv6.mroute_do_assert = !!v; + net->ipv6.mroute_do_assert = !!v; return 0; } @@ -1330,10 +1347,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int v = !!v; rtnl_lock(); ret = 0; - if (v != init_net.ipv6.mroute_do_pim) { - init_net.ipv6.mroute_do_pim = v; - init_net.ipv6.mroute_do_assert = v; - if (init_net.ipv6.mroute_do_pim) + if (v != net->ipv6.mroute_do_pim) { + net->ipv6.mroute_do_pim = v; + net->ipv6.mroute_do_assert = v; + if (net->ipv6.mroute_do_pim) ret = inet6_add_protocol(&pim6_protocol, IPPROTO_PIM); else @@ -1365,6 +1382,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, { int olr; int val; + struct net *net = sock_net(sk); switch (optname) { case MRT6_VERSION: @@ -1372,11 +1390,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, break; #ifdef CONFIG_IPV6_PIMSM_V2 case MRT6_PIM: - val = init_net.ipv6.mroute_do_pim; + val = net->ipv6.mroute_do_pim; break; #endif case MRT6_ASSERT: - val = init_net.ipv6.mroute_do_assert; + val = net->ipv6.mroute_do_assert; break; default: return -ENOPROTOOPT; @@ -1406,16 +1424,17 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) struct sioc_mif_req6 vr; struct mif_device *vif; struct mfc6_cache *c; + struct net *net = sock_net(sk); switch (cmd) { case SIOCGETMIFCNT_IN6: if (copy_from_user(&vr, arg, sizeof(vr))) return -EFAULT; - if (vr.mifi >= init_net.ipv6.maxvif) + if (vr.mifi >= net->ipv6.maxvif) return -EINVAL; read_lock(&mrt_lock); - vif = &init_net.ipv6.vif6_table[vr.mifi]; - if (MIF_EXISTS(&init_net, vr.mifi)) { + vif = &net->ipv6.vif6_table[vr.mifi]; + if (MIF_EXISTS(net, vr.mifi)) { vr.icount = vif->pkt_in; vr.ocount = vif->pkt_out; vr.ibytes = vif->bytes_in; @@ -1433,7 +1452,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) return -EFAULT; read_lock(&mrt_lock); - c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr); + c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr); if (c) { sr.pktcnt = c->mfc_un.res.pkt; sr.bytecnt = c->mfc_un.res.bytes; @@ -1466,7 +1485,8 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb) static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) { struct ipv6hdr *ipv6h; - struct mif_device *vif = &init_net.ipv6.vif6_table[vifi]; + struct net *net = mfc6_net(c); + struct mif_device *vif = &net->ipv6.vif6_table[vifi]; struct net_device *dev; struct dst_entry *dst; struct flowi fl; @@ -1480,7 +1500,7 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) vif->bytes_out += skb->len; vif->dev->stats.tx_bytes += skb->len; vif->dev->stats.tx_packets++; - ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT); + ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT); kfree_skb(skb); return 0; } @@ -1495,7 +1515,7 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) } }; - dst = ip6_route_output(&init_net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl); if (!dst) goto out_free; @@ -1538,9 +1558,10 @@ out_free: static int ip6mr_find_vif(struct net_device *dev) { + struct net *net = dev_net(dev); int ct; - for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) { - if (init_net.ipv6.vif6_table[ct].dev == dev) + for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) { + if (net->ipv6.vif6_table[ct].dev == dev) break; } return ct; @@ -1550,6 +1571,7 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) { int psend = -1; int vif, ct; + struct net *net = mfc6_net(cache); vif = cache->mf6c_parent; cache->mfc_un.res.pkt++; @@ -1558,30 +1580,30 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache) /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (init_net.ipv6.vif6_table[vif].dev != skb->dev) { + if (net->ipv6.vif6_table[vif].dev != skb->dev) { int true_vifi; cache->mfc_un.res.wrong_if++; true_vifi = ip6mr_find_vif(skb->dev); - if (true_vifi >= 0 && init_net.ipv6.mroute_do_assert && + if (true_vifi >= 0 && net->ipv6.mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, so that we cannot check that packet arrived on an oif. It is bad, but otherwise we would need to move pretty large chunk of pimd to kernel. Ough... --ANK */ - (init_net.ipv6.mroute_do_pim || + (net->ipv6.mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { cache->mfc_un.res.last_assert = jiffies; - ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF); + ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF); } goto dont_forward; } - init_net.ipv6.vif6_table[vif].pkt_in++; - init_net.ipv6.vif6_table[vif].bytes_in += skb->len; + net->ipv6.vif6_table[vif].pkt_in++; + net->ipv6.vif6_table[vif].bytes_in += skb->len; /* * Forward the frame @@ -1614,9 +1636,11 @@ dont_forward: int ip6_mr_input(struct sk_buff *skb) { struct mfc6_cache *cache; + struct net *net = dev_net(skb->dev); read_lock(&mrt_lock); - cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); + cache = ip6mr_cache_find(net, + &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); /* * No usable cache entry @@ -1626,7 +1650,7 @@ int ip6_mr_input(struct sk_buff *skb) vif = ip6mr_find_vif(skb->dev); if (vif >= 0) { - int err = ip6mr_cache_unresolved(vif, skb); + int err = ip6mr_cache_unresolved(net, vif, skb); read_unlock(&mrt_lock); return err; @@ -1649,7 +1673,8 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) { int ct; struct rtnexthop *nhp; - struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev; + struct net *net = mfc6_net(c); + struct net_device *dev = net->ipv6.vif6_table[c->mf6c_parent].dev; u8 *b = skb_tail_pointer(skb); struct rtattr *mp_head; @@ -1665,7 +1690,7 @@ ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm) nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex; + nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex; nhp->rtnh_len = sizeof(*nhp); } } @@ -1679,14 +1704,15 @@ rtattr_failure: return -EMSGSIZE; } -int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) +int ip6mr_get_route(struct net *net, + struct sk_buff *skb, struct rtmsg *rtm, int nowait) { int err; struct mfc6_cache *cache; struct rt6_info *rt = (struct rt6_info *)skb->dst; read_lock(&mrt_lock); - cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr); + cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); if (!cache) { struct sk_buff *skb2; @@ -1729,7 +1755,7 @@ int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr); ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr); - err = ip6mr_cache_unresolved(vif, skb2); + err = ip6mr_cache_unresolved(net, vif, skb2); read_unlock(&mrt_lock); return err; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9da1ece466a2..18c486cf4987 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2194,7 +2194,7 @@ static int rt6_fill_node(struct net *net, if (iif) { #ifdef CONFIG_IPV6_MROUTE if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { - int err = ip6mr_get_route(skb, rtm, nowait); + int err = ip6mr_get_route(net, skb, rtm, nowait); if (err <= 0) { if (!nowait) { if (err == 0) -- cgit v1.2.3 From 6c34bc2976b30dc8b56392c020e25bae1f363cab Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 10 Dec 2008 09:26:17 -0800 Subject: Revert "radeonfb: accelerate imageblit and other improvements" This reverts commit b1ee26bab14886350ba12a5c10cbc0696ac679bf, along with the "fixes" for it that all just caused problems: - c4c6fa9891f3d1bcaae4f39fb751d5302965b566 "radeonfb: fix problem with color expansion & alignment" - f3179748a157c21d44d929fd3779421ebfbeaa93 "radeonfb: Disable new color expand acceleration unless explicitely enabled" because even when disabled, it breaks for people. See http://bugzilla.kernel.org/show_bug.cgi?id=12191 for the latest example. Acked-by: Benjamin Herrenschmidt Acked-by: David S. Miller Cc: Krzysztof Halasa Cc: James Cloos Cc: "Rafael J. Wysocki" Cc: Krzysztof Helt Cc: Jean-Luc Coulon Cc: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/aty/radeon_accel.c | 295 ++++++++++------------------------- drivers/video/aty/radeon_backlight.c | 2 +- drivers/video/aty/radeon_base.c | 46 ++---- drivers/video/aty/radeon_pm.c | 6 +- drivers/video/aty/radeonfb.h | 40 ++--- include/video/radeon.h | 18 +-- 6 files changed, 127 insertions(+), 280 deletions(-) (limited to 'include') diff --git a/drivers/video/aty/radeon_accel.c b/drivers/video/aty/radeon_accel.c index 8da5e5ab8547..a469a3d6edcb 100644 --- a/drivers/video/aty/radeon_accel.c +++ b/drivers/video/aty/radeon_accel.c @@ -5,61 +5,61 @@ * --dte */ -#define FLUSH_CACHE_WORKAROUND 1 - -void radeon_fifo_update_and_wait(struct radeonfb_info *rinfo, int entries) +static void radeon_fixup_offset(struct radeonfb_info *rinfo) { - int i; + u32 local_base; + + /* *** Ugly workaround *** */ + /* + * On some platforms, the video memory is mapped at 0 in radeon chip space + * (like PPCs) by the firmware. X will always move it up so that it's seen + * by the chip to be at the same address as the PCI BAR. + * That means that when switching back from X, there is a mismatch between + * the offsets programmed into the engine. This means that potentially, + * accel operations done before radeonfb has a chance to re-init the engine + * will have incorrect offsets, and potentially trash system memory ! + * + * The correct fix is for fbcon to never call any accel op before the engine + * has properly been re-initialized (by a call to set_var), but this is a + * complex fix. This workaround in the meantime, called before every accel + * operation, makes sure the offsets are in sync. + */ - for (i=0; i<2000000; i++) { - rinfo->fifo_free = INREG(RBBM_STATUS) & 0x7f; - if (rinfo->fifo_free >= entries) - return; - udelay(10); - } - printk(KERN_ERR "radeonfb: FIFO Timeout !\n"); - /* XXX Todo: attempt to reset the engine */ -} + radeon_fifo_wait (1); + local_base = INREG(MC_FB_LOCATION) << 16; + if (local_base == rinfo->fb_local_base) + return; -static inline void radeon_fifo_wait(struct radeonfb_info *rinfo, int entries) -{ - if (entries <= rinfo->fifo_free) - rinfo->fifo_free -= entries; - else - radeon_fifo_update_and_wait(rinfo, entries); -} + rinfo->fb_local_base = local_base; -static inline void radeonfb_set_creg(struct radeonfb_info *rinfo, u32 reg, - u32 *cache, u32 new_val) -{ - if (new_val == *cache) - return; - *cache = new_val; - radeon_fifo_wait(rinfo, 1); - OUTREG(reg, new_val); + radeon_fifo_wait (3); + OUTREG(DEFAULT_PITCH_OFFSET, (rinfo->pitch << 0x16) | + (rinfo->fb_local_base >> 10)); + OUTREG(DST_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10)); + OUTREG(SRC_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10)); } static void radeonfb_prim_fillrect(struct radeonfb_info *rinfo, const struct fb_fillrect *region) { - radeonfb_set_creg(rinfo, DP_GUI_MASTER_CNTL, &rinfo->dp_gui_mc_cache, - rinfo->dp_gui_mc_base | GMC_BRUSH_SOLID_COLOR | ROP3_P); - radeonfb_set_creg(rinfo, DP_CNTL, &rinfo->dp_cntl_cache, - DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM); - radeonfb_set_creg(rinfo, DP_BRUSH_FRGD_CLR, &rinfo->dp_brush_fg_cache, - region->color); - - /* Ensure the dst cache is flushed and the engine idle before - * issuing the operation. - * - * This works around engine lockups on some cards - */ -#if FLUSH_CACHE_WORKAROUND - radeon_fifo_wait(rinfo, 2); + radeon_fifo_wait(4); + + OUTREG(DP_GUI_MASTER_CNTL, + rinfo->dp_gui_master_cntl /* contains, like GMC_DST_32BPP */ + | GMC_BRUSH_SOLID_COLOR + | ROP3_P); + if (radeon_get_dstbpp(rinfo->depth) != DST_8BPP) + OUTREG(DP_BRUSH_FRGD_CLR, rinfo->pseudo_palette[region->color]); + else + OUTREG(DP_BRUSH_FRGD_CLR, region->color); + OUTREG(DP_WRITE_MSK, 0xffffffff); + OUTREG(DP_CNTL, (DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM)); + + radeon_fifo_wait(2); OUTREG(DSTCACHE_CTLSTAT, RB2D_DC_FLUSH_ALL); OUTREG(WAIT_UNTIL, (WAIT_2D_IDLECLEAN | WAIT_DMA_GUI_IDLE)); -#endif - radeon_fifo_wait(rinfo, 2); + + radeon_fifo_wait(2); OUTREG(DST_Y_X, (region->dy << 16) | region->dx); OUTREG(DST_WIDTH_HEIGHT, (region->width << 16) | region->height); } @@ -70,14 +70,15 @@ void radeonfb_fillrect(struct fb_info *info, const struct fb_fillrect *region) struct fb_fillrect modded; int vxres, vyres; - WARN_ON(rinfo->gfx_mode); - if (info->state != FBINFO_STATE_RUNNING || rinfo->gfx_mode) + if (info->state != FBINFO_STATE_RUNNING) return; if (info->flags & FBINFO_HWACCEL_DISABLED) { cfb_fillrect(info, region); return; } + radeon_fixup_offset(rinfo); + vxres = info->var.xres_virtual; vyres = info->var.yres_virtual; @@ -90,10 +91,6 @@ void radeonfb_fillrect(struct fb_info *info, const struct fb_fillrect *region) if(modded.dx + modded.width > vxres) modded.width = vxres - modded.dx; if(modded.dy + modded.height > vyres) modded.height = vyres - modded.dy; - if (info->fix.visual == FB_VISUAL_TRUECOLOR || - info->fix.visual == FB_VISUAL_DIRECTCOLOR ) - modded.color = ((u32 *) (info->pseudo_palette))[region->color]; - radeonfb_prim_fillrect(rinfo, &modded); } @@ -112,22 +109,22 @@ static void radeonfb_prim_copyarea(struct radeonfb_info *rinfo, if ( xdir < 0 ) { sx += w-1; dx += w-1; } if ( ydir < 0 ) { sy += h-1; dy += h-1; } - radeonfb_set_creg(rinfo, DP_GUI_MASTER_CNTL, &rinfo->dp_gui_mc_cache, - rinfo->dp_gui_mc_base | - GMC_BRUSH_NONE | - GMC_SRC_DATATYPE_COLOR | - ROP3_S | - DP_SRC_SOURCE_MEMORY); - radeonfb_set_creg(rinfo, DP_CNTL, &rinfo->dp_cntl_cache, - (xdir>=0 ? DST_X_LEFT_TO_RIGHT : 0) | - (ydir>=0 ? DST_Y_TOP_TO_BOTTOM : 0)); - -#if FLUSH_CACHE_WORKAROUND - radeon_fifo_wait(rinfo, 2); + radeon_fifo_wait(3); + OUTREG(DP_GUI_MASTER_CNTL, + rinfo->dp_gui_master_cntl /* i.e. GMC_DST_32BPP */ + | GMC_BRUSH_NONE + | GMC_SRC_DSTCOLOR + | ROP3_S + | DP_SRC_SOURCE_MEMORY ); + OUTREG(DP_WRITE_MSK, 0xffffffff); + OUTREG(DP_CNTL, (xdir>=0 ? DST_X_LEFT_TO_RIGHT : 0) + | (ydir>=0 ? DST_Y_TOP_TO_BOTTOM : 0)); + + radeon_fifo_wait(2); OUTREG(DSTCACHE_CTLSTAT, RB2D_DC_FLUSH_ALL); OUTREG(WAIT_UNTIL, (WAIT_2D_IDLECLEAN | WAIT_DMA_GUI_IDLE)); -#endif - radeon_fifo_wait(rinfo, 3); + + radeon_fifo_wait(3); OUTREG(SRC_Y_X, (sy << 16) | sx); OUTREG(DST_Y_X, (dy << 16) | dx); OUTREG(DST_HEIGHT_WIDTH, (h << 16) | w); @@ -146,14 +143,15 @@ void radeonfb_copyarea(struct fb_info *info, const struct fb_copyarea *area) modded.width = area->width; modded.height = area->height; - WARN_ON(rinfo->gfx_mode); - if (info->state != FBINFO_STATE_RUNNING || rinfo->gfx_mode) + if (info->state != FBINFO_STATE_RUNNING) return; if (info->flags & FBINFO_HWACCEL_DISABLED) { cfb_copyarea(info, area); return; } + radeon_fixup_offset(rinfo); + vxres = info->var.xres_virtual; vyres = info->var.yres_virtual; @@ -170,116 +168,13 @@ void radeonfb_copyarea(struct fb_info *info, const struct fb_copyarea *area) radeonfb_prim_copyarea(rinfo, &modded); } -static void radeonfb_prim_imageblit(struct radeonfb_info *rinfo, - const struct fb_image *image, - u32 fg, u32 bg) -{ - unsigned int dwords; - u32 *bits; - - radeonfb_set_creg(rinfo, DP_GUI_MASTER_CNTL, &rinfo->dp_gui_mc_cache, - rinfo->dp_gui_mc_base | - GMC_BRUSH_NONE | GMC_DST_CLIP_LEAVE | - GMC_SRC_DATATYPE_MONO_FG_BG | - ROP3_S | - GMC_BYTE_ORDER_MSB_TO_LSB | - DP_SRC_SOURCE_HOST_DATA); - radeonfb_set_creg(rinfo, DP_CNTL, &rinfo->dp_cntl_cache, - DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM); - radeonfb_set_creg(rinfo, DP_SRC_FRGD_CLR, &rinfo->dp_src_fg_cache, fg); - radeonfb_set_creg(rinfo, DP_SRC_BKGD_CLR, &rinfo->dp_src_bg_cache, bg); - - /* Ensure the dst cache is flushed and the engine idle before - * issuing the operation. - * - * This works around engine lockups on some cards - */ -#if FLUSH_CACHE_WORKAROUND - radeon_fifo_wait(rinfo, 2); - OUTREG(DSTCACHE_CTLSTAT, RB2D_DC_FLUSH_ALL); - OUTREG(WAIT_UNTIL, (WAIT_2D_IDLECLEAN | WAIT_DMA_GUI_IDLE)); -#endif - - /* X here pads width to a multiple of 32 and uses the clipper to - * adjust the result. Is that really necessary ? Things seem to - * work ok for me without that and the doco doesn't seem to imply] - * there is such a restriction. - */ - radeon_fifo_wait(rinfo, 4); - OUTREG(SC_TOP_LEFT, (image->dy << 16) | image->dx); - OUTREG(SC_BOTTOM_RIGHT, ((image->dy + image->height) << 16) | - (image->dx + image->width)); - OUTREG(DST_Y_X, (image->dy << 16) | image->dx); - - OUTREG(DST_HEIGHT_WIDTH, (image->height << 16) | ((image->width + 31) & ~31)); - - dwords = (image->width + 31) >> 5; - dwords *= image->height; - bits = (u32*)(image->data); - - while(dwords >= 8) { - radeon_fifo_wait(rinfo, 8); -#if BITS_PER_LONG == 64 - __raw_writeq(*((u64 *)(bits)), rinfo->mmio_base + HOST_DATA0); - __raw_writeq(*((u64 *)(bits+2)), rinfo->mmio_base + HOST_DATA2); - __raw_writeq(*((u64 *)(bits+4)), rinfo->mmio_base + HOST_DATA4); - __raw_writeq(*((u64 *)(bits+6)), rinfo->mmio_base + HOST_DATA6); - bits += 8; -#else - __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA0); - __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA1); - __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA2); - __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA3); - __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA4); - __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA5); - __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA6); - __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA7); -#endif - dwords -= 8; - } - while(dwords--) { - radeon_fifo_wait(rinfo, 1); - __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA0); - } -} - void radeonfb_imageblit(struct fb_info *info, const struct fb_image *image) { struct radeonfb_info *rinfo = info->par; - u32 fg, bg; - WARN_ON(rinfo->gfx_mode); - if (info->state != FBINFO_STATE_RUNNING || rinfo->gfx_mode) - return; - - if (!image->width || !image->height) + if (info->state != FBINFO_STATE_RUNNING) return; - - /* We only do 1 bpp color expansion for now */ - if (!accel_cexp || - (info->flags & FBINFO_HWACCEL_DISABLED) || image->depth != 1) - goto fallback; - - /* Fallback if running out of the screen. We may do clipping - * in the future */ - if ((image->dx + image->width) > info->var.xres_virtual || - (image->dy + image->height) > info->var.yres_virtual) - goto fallback; - - if (info->fix.visual == FB_VISUAL_TRUECOLOR || - info->fix.visual == FB_VISUAL_DIRECTCOLOR) { - fg = ((u32*)(info->pseudo_palette))[image->fg_color]; - bg = ((u32*)(info->pseudo_palette))[image->bg_color]; - } else { - fg = image->fg_color; - bg = image->bg_color; - } - - radeonfb_prim_imageblit(rinfo, image, fg, bg); - return; - - fallback: - radeon_engine_idle(rinfo); + radeon_engine_idle(); cfb_imageblit(info, image); } @@ -290,8 +185,7 @@ int radeonfb_sync(struct fb_info *info) if (info->state != FBINFO_STATE_RUNNING) return 0; - - radeon_engine_idle(rinfo); + radeon_engine_idle(); return 0; } @@ -367,10 +261,9 @@ void radeonfb_engine_init (struct radeonfb_info *rinfo) /* disable 3D engine */ OUTREG(RB3D_CNTL, 0); - rinfo->fifo_free = 0; radeonfb_engine_reset(rinfo); - radeon_fifo_wait(rinfo, 1); + radeon_fifo_wait (1); if (IS_R300_VARIANT(rinfo)) { OUTREG(RB2D_DSTCACHE_MODE, INREG(RB2D_DSTCACHE_MODE) | RB2D_DC_AUTOFLUSH_ENABLE | @@ -384,7 +277,7 @@ void radeonfb_engine_init (struct radeonfb_info *rinfo) OUTREG(RB2D_DSTCACHE_MODE, 0); } - radeon_fifo_wait(rinfo, 3); + radeon_fifo_wait (3); /* We re-read MC_FB_LOCATION from card as it can have been * modified by XFree drivers (ouch !) */ @@ -395,57 +288,41 @@ void radeonfb_engine_init (struct radeonfb_info *rinfo) OUTREG(DST_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10)); OUTREG(SRC_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10)); - radeon_fifo_wait(rinfo, 1); -#ifdef __BIG_ENDIAN + radeon_fifo_wait (1); +#if defined(__BIG_ENDIAN) OUTREGP(DP_DATATYPE, HOST_BIG_ENDIAN_EN, ~HOST_BIG_ENDIAN_EN); #else OUTREGP(DP_DATATYPE, 0, ~HOST_BIG_ENDIAN_EN); #endif - radeon_fifo_wait(rinfo, 2); + radeon_fifo_wait (2); OUTREG(DEFAULT_SC_TOP_LEFT, 0); OUTREG(DEFAULT_SC_BOTTOM_RIGHT, (DEFAULT_SC_RIGHT_MAX | DEFAULT_SC_BOTTOM_MAX)); - /* set default DP_GUI_MASTER_CNTL */ temp = radeon_get_dstbpp(rinfo->depth); - rinfo->dp_gui_mc_base = ((temp << 8) | GMC_CLR_CMP_CNTL_DIS); + rinfo->dp_gui_master_cntl = ((temp << 8) | GMC_CLR_CMP_CNTL_DIS); - rinfo->dp_gui_mc_cache = rinfo->dp_gui_mc_base | - GMC_BRUSH_SOLID_COLOR | - GMC_SRC_DATATYPE_COLOR; - radeon_fifo_wait(rinfo, 1); - OUTREG(DP_GUI_MASTER_CNTL, rinfo->dp_gui_mc_cache); + radeon_fifo_wait (1); + OUTREG(DP_GUI_MASTER_CNTL, (rinfo->dp_gui_master_cntl | + GMC_BRUSH_SOLID_COLOR | + GMC_SRC_DATATYPE_COLOR)); + radeon_fifo_wait (7); /* clear line drawing regs */ - radeon_fifo_wait(rinfo, 2); OUTREG(DST_LINE_START, 0); OUTREG(DST_LINE_END, 0); - /* set brush and source color regs */ - rinfo->dp_brush_fg_cache = 0xffffffff; - rinfo->dp_brush_bg_cache = 0x00000000; - rinfo->dp_src_fg_cache = 0xffffffff; - rinfo->dp_src_bg_cache = 0x00000000; - radeon_fifo_wait(rinfo, 4); - OUTREG(DP_BRUSH_FRGD_CLR, rinfo->dp_brush_fg_cache); - OUTREG(DP_BRUSH_BKGD_CLR, rinfo->dp_brush_bg_cache); - OUTREG(DP_SRC_FRGD_CLR, rinfo->dp_src_fg_cache); - OUTREG(DP_SRC_BKGD_CLR, rinfo->dp_src_bg_cache); - - /* Default direction */ - rinfo->dp_cntl_cache = DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM; - radeon_fifo_wait(rinfo, 1); - OUTREG(DP_CNTL, rinfo->dp_cntl_cache); + /* set brush color regs */ + OUTREG(DP_BRUSH_FRGD_CLR, 0xffffffff); + OUTREG(DP_BRUSH_BKGD_CLR, 0x00000000); + + /* set source color regs */ + OUTREG(DP_SRC_FRGD_CLR, 0xffffffff); + OUTREG(DP_SRC_BKGD_CLR, 0x00000000); /* default write mask */ - radeon_fifo_wait(rinfo, 1); OUTREG(DP_WRITE_MSK, 0xffffffff); - /* Default to no swapping of host data */ - radeon_fifo_wait(rinfo, 1); - OUTREG(RBBM_GUICNTL, RBBM_GUICNTL_HOST_DATA_SWAP_NONE); - - /* Make sure it's settled */ - radeon_engine_idle(rinfo); + radeon_engine_idle (); } diff --git a/drivers/video/aty/radeon_backlight.c b/drivers/video/aty/radeon_backlight.c index f343ba83f0ae..1a056adb61c8 100644 --- a/drivers/video/aty/radeon_backlight.c +++ b/drivers/video/aty/radeon_backlight.c @@ -66,7 +66,7 @@ static int radeon_bl_update_status(struct backlight_device *bd) level = bd->props.brightness; del_timer_sync(&rinfo->lvds_timer); - radeon_engine_idle(rinfo); + radeon_engine_idle(); lvds_gen_cntl = INREG(LVDS_GEN_CNTL); if (level > 0) { diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c index d5b27f9d374d..d0f1a7fc2c9d 100644 --- a/drivers/video/aty/radeon_base.c +++ b/drivers/video/aty/radeon_base.c @@ -282,8 +282,6 @@ static int backlight = 1; static int backlight = 0; #endif -int accel_cexp = 0; - /* * prototypes */ @@ -854,6 +852,7 @@ static int radeonfb_pan_display (struct fb_var_screeninfo *var, if (rinfo->asleep) return 0; + radeon_fifo_wait(2); OUTREG(CRTC_OFFSET, ((var->yoffset * var->xres_virtual + var->xoffset) * var->bits_per_pixel / 8) & ~7); return 0; @@ -883,6 +882,7 @@ static int radeonfb_ioctl (struct fb_info *info, unsigned int cmd, if (rc) return rc; + radeon_fifo_wait(2); if (value & 0x01) { tmp = INREG(LVDS_GEN_CNTL); @@ -940,7 +940,7 @@ int radeon_screen_blank(struct radeonfb_info *rinfo, int blank, int mode_switch) if (rinfo->lock_blank) return 0; - radeon_engine_idle(rinfo); + radeon_engine_idle(); val = INREG(CRTC_EXT_CNTL); val &= ~(CRTC_DISPLAY_DIS | CRTC_HSYNC_DIS | @@ -1048,7 +1048,7 @@ static int radeonfb_blank (int blank, struct fb_info *info) if (rinfo->asleep) return 0; - + return radeon_screen_blank(rinfo, blank, 0); } @@ -1074,6 +1074,8 @@ static int radeon_setcolreg (unsigned regno, unsigned red, unsigned green, pindex = regno; if (!rinfo->asleep) { + radeon_fifo_wait(9); + if (rinfo->bpp == 16) { pindex = regno * 8; @@ -1242,6 +1244,8 @@ static void radeon_write_pll_regs(struct radeonfb_info *rinfo, struct radeon_reg { int i; + radeon_fifo_wait(20); + /* Workaround from XFree */ if (rinfo->is_mobility) { /* A temporal workaround for the occational blanking on certain laptop @@ -1337,7 +1341,7 @@ static void radeon_lvds_timer_func(unsigned long data) { struct radeonfb_info *rinfo = (struct radeonfb_info *)data; - radeon_engine_idle(rinfo); + radeon_engine_idle(); OUTREG(LVDS_GEN_CNTL, rinfo->pending_lvds_gen_cntl); } @@ -1355,11 +1359,10 @@ void radeon_write_mode (struct radeonfb_info *rinfo, struct radeon_regs *mode, if (nomodeset) return; - radeon_engine_idle(rinfo); - if (!regs_only) radeon_screen_blank(rinfo, FB_BLANK_NORMAL, 0); + radeon_fifo_wait(31); for (i=0; i<10; i++) OUTREG(common_regs[i].reg, common_regs[i].val); @@ -1387,6 +1390,7 @@ void radeon_write_mode (struct radeonfb_info *rinfo, struct radeon_regs *mode, radeon_write_pll_regs(rinfo, mode); if ((primary_mon == MT_DFP) || (primary_mon == MT_LCD)) { + radeon_fifo_wait(10); OUTREG(FP_CRTC_H_TOTAL_DISP, mode->fp_crtc_h_total_disp); OUTREG(FP_CRTC_V_TOTAL_DISP, mode->fp_crtc_v_total_disp); OUTREG(FP_H_SYNC_STRT_WID, mode->fp_h_sync_strt_wid); @@ -1401,6 +1405,7 @@ void radeon_write_mode (struct radeonfb_info *rinfo, struct radeon_regs *mode, if (!regs_only) radeon_screen_blank(rinfo, FB_BLANK_UNBLANK, 0); + radeon_fifo_wait(2); OUTPLL(VCLK_ECP_CNTL, mode->vclk_ecp_cntl); return; @@ -1551,7 +1556,7 @@ static int radeonfb_set_par(struct fb_info *info) /* We always want engine to be idle on a mode switch, even * if we won't actually change the mode */ - radeon_engine_idle(rinfo); + radeon_engine_idle(); hSyncStart = mode->xres + mode->right_margin; hSyncEnd = hSyncStart + mode->hsync_len; @@ -1846,6 +1851,7 @@ static int radeonfb_set_par(struct fb_info *info) return 0; } + static struct fb_ops radeonfb_ops = { .owner = THIS_MODULE, .fb_check_var = radeonfb_check_var, @@ -1869,7 +1875,6 @@ static int __devinit radeon_set_fbinfo (struct radeonfb_info *rinfo) info->par = rinfo; info->pseudo_palette = rinfo->pseudo_palette; info->flags = FBINFO_DEFAULT - | FBINFO_HWACCEL_IMAGEBLIT | FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT | FBINFO_HWACCEL_XPAN @@ -1877,7 +1882,6 @@ static int __devinit radeon_set_fbinfo (struct radeonfb_info *rinfo) info->fbops = &radeonfb_ops; info->screen_base = rinfo->fb_base; info->screen_size = rinfo->mapped_vram; - /* Fill fix common fields */ strlcpy(info->fix.id, rinfo->name, sizeof(info->fix.id)); info->fix.smem_start = rinfo->fb_base_phys; @@ -1892,25 +1896,8 @@ static int __devinit radeon_set_fbinfo (struct radeonfb_info *rinfo) info->fix.mmio_len = RADEON_REGSIZE; info->fix.accel = FB_ACCEL_ATI_RADEON; - /* Allocate colormap */ fb_alloc_cmap(&info->cmap, 256, 0); - /* Setup pixmap used for acceleration */ -#define PIXMAP_SIZE (2048 * 4) - - info->pixmap.addr = kmalloc(PIXMAP_SIZE, GFP_KERNEL); - if (!info->pixmap.addr) { - printk(KERN_ERR "radeonfb: Failed to allocate pixmap !\n"); - noaccel = 1; - goto bail; - } - info->pixmap.size = PIXMAP_SIZE; - info->pixmap.flags = FB_PIXMAP_SYSTEM; - info->pixmap.scan_align = 4; - info->pixmap.buf_align = 4; - info->pixmap.access_align = 32; - -bail: if (noaccel) info->flags |= FBINFO_HWACCEL_DISABLED; @@ -2019,6 +2006,7 @@ static void radeon_identify_vram(struct radeonfb_info *rinfo) u32 tom = INREG(NB_TOM); tmp = ((((tom >> 16) - (tom & 0xffff) + 1) << 6) * 1024); + radeon_fifo_wait(6); OUTREG(MC_FB_LOCATION, tom); OUTREG(DISPLAY_BASE_ADDR, (tom & 0xffff) << 16); OUTREG(CRTC2_DISPLAY_BASE_ADDR, (tom & 0xffff) << 16); @@ -2522,8 +2510,6 @@ static int __init radeonfb_setup (char *options) } else if (!strncmp(this_opt, "ignore_devlist", 14)) { ignore_devlist = 1; #endif - } else if (!strncmp(this_opt, "accel_cexp", 12)) { - accel_cexp = 1; } else mode_option = this_opt; } @@ -2571,8 +2557,6 @@ module_param(monitor_layout, charp, 0); MODULE_PARM_DESC(monitor_layout, "Specify monitor mapping (like XFree86)"); module_param(force_measure_pll, bool, 0); MODULE_PARM_DESC(force_measure_pll, "Force measurement of PLL (debug)"); -module_param(accel_cexp, bool, 0); -MODULE_PARM_DESC(accel_cexp, "Use acceleration engine for color expansion"); #ifdef CONFIG_MTRR module_param(nomtrr, bool, 0); MODULE_PARM_DESC(nomtrr, "bool: disable use of MTRR registers"); diff --git a/drivers/video/aty/radeon_pm.c b/drivers/video/aty/radeon_pm.c index 3df5015f1d13..675abdafc2d8 100644 --- a/drivers/video/aty/radeon_pm.c +++ b/drivers/video/aty/radeon_pm.c @@ -2653,9 +2653,9 @@ int radeonfb_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) if (!(info->flags & FBINFO_HWACCEL_DISABLED)) { /* Make sure engine is reset */ - radeon_engine_idle(rinfo); + radeon_engine_idle(); radeonfb_engine_reset(rinfo); - radeon_engine_idle(rinfo); + radeon_engine_idle(); } /* Blank display and LCD */ @@ -2767,7 +2767,7 @@ int radeonfb_pci_resume(struct pci_dev *pdev) rinfo->asleep = 0; } else - radeon_engine_idle(rinfo); + radeon_engine_idle(); /* Restore display & engine */ radeon_write_mode (rinfo, &rinfo->state, 1); diff --git a/drivers/video/aty/radeonfb.h b/drivers/video/aty/radeonfb.h index 974ca6d86540..3ea1b00fdd22 100644 --- a/drivers/video/aty/radeonfb.h +++ b/drivers/video/aty/radeonfb.h @@ -336,15 +336,7 @@ struct radeonfb_info { int mon2_type; u8 *mon2_EDID; - /* accel bits */ - u32 dp_gui_mc_base; - u32 dp_gui_mc_cache; - u32 dp_cntl_cache; - u32 dp_brush_fg_cache; - u32 dp_brush_bg_cache; - u32 dp_src_fg_cache; - u32 dp_src_bg_cache; - u32 fifo_free; + u32 dp_gui_master_cntl; struct pll_info pll; @@ -356,7 +348,6 @@ struct radeonfb_info { int lock_blank; int dynclk; int no_schedule; - int gfx_mode; enum radeon_pm_mode pm_mode; reinit_function_ptr reinit_func; @@ -401,14 +392,8 @@ static inline void _radeon_msleep(struct radeonfb_info *rinfo, unsigned long ms) #define OUTREG8(addr,val) writeb(val, (rinfo->mmio_base)+addr) #define INREG16(addr) readw((rinfo->mmio_base)+addr) #define OUTREG16(addr,val) writew(val, (rinfo->mmio_base)+addr) - -#ifdef CONFIG_PPC -#define INREG(addr) ({ eieio(); ld_le32(rinfo->mmio_base+(addr)); }) -#define OUTREG(addr,val) do { eieio(); st_le32(rinfo->mmio_base+(addr),(val)); } while(0) -#else #define INREG(addr) readl((rinfo->mmio_base)+addr) #define OUTREG(addr,val) writel(val, (rinfo->mmio_base)+addr) -#endif static inline void _OUTREGP(struct radeonfb_info *rinfo, u32 addr, u32 val, u32 mask) @@ -550,7 +535,17 @@ static inline u32 radeon_get_dstbpp(u16 depth) * 2D Engine helper routines */ -extern void radeon_fifo_update_and_wait(struct radeonfb_info *rinfo, int entries); +static inline void _radeon_fifo_wait(struct radeonfb_info *rinfo, int entries) +{ + int i; + + for (i=0; i<2000000; i++) { + if ((INREG(RBBM_STATUS) & 0x7f) >= entries) + return; + udelay(1); + } + printk(KERN_ERR "radeonfb: FIFO Timeout !\n"); +} static inline void radeon_engine_flush (struct radeonfb_info *rinfo) { @@ -563,7 +558,7 @@ static inline void radeon_engine_flush (struct radeonfb_info *rinfo) /* Ensure FIFO is empty, ie, make sure the flush commands * has reached the cache */ - radeon_fifo_update_and_wait(rinfo, 64); + _radeon_fifo_wait (rinfo, 64); /* Wait for the flush to complete */ for (i=0; i < 2000000; i++) { @@ -575,12 +570,12 @@ static inline void radeon_engine_flush (struct radeonfb_info *rinfo) } -static inline void radeon_engine_idle(struct radeonfb_info *rinfo) +static inline void _radeon_engine_idle(struct radeonfb_info *rinfo) { int i; /* ensure FIFO is empty before waiting for idle */ - radeon_fifo_update_and_wait (rinfo, 64); + _radeon_fifo_wait (rinfo, 64); for (i=0; i<2000000; i++) { if (((INREG(RBBM_STATUS) & GUI_ACTIVE)) == 0) { @@ -593,6 +588,8 @@ static inline void radeon_engine_idle(struct radeonfb_info *rinfo) } +#define radeon_engine_idle() _radeon_engine_idle(rinfo) +#define radeon_fifo_wait(entries) _radeon_fifo_wait(rinfo,entries) #define radeon_msleep(ms) _radeon_msleep(rinfo,ms) @@ -622,7 +619,6 @@ extern void radeonfb_imageblit(struct fb_info *p, const struct fb_image *image); extern int radeonfb_sync(struct fb_info *info); extern void radeonfb_engine_init (struct radeonfb_info *rinfo); extern void radeonfb_engine_reset(struct radeonfb_info *rinfo); -extern void radeon_fixup_mem_offset(struct radeonfb_info *rinfo); /* Other functions */ extern int radeon_screen_blank(struct radeonfb_info *rinfo, int blank, int mode_switch); @@ -638,6 +634,4 @@ static inline void radeonfb_bl_init(struct radeonfb_info *rinfo) {} static inline void radeonfb_bl_exit(struct radeonfb_info *rinfo) {} #endif -extern int accel_cexp; - #endif /* __RADEONFB_H__ */ diff --git a/include/video/radeon.h b/include/video/radeon.h index d5dcaf154ba4..1cd09cc5b169 100644 --- a/include/video/radeon.h +++ b/include/video/radeon.h @@ -525,9 +525,6 @@ #define CRTC_DISPLAY_DIS (1 << 10) #define CRTC_CRT_ON (1 << 15) -/* DSTCACHE_MODE bits constants */ -#define RB2D_DC_AUTOFLUSH_ENABLE (1 << 8) -#define RB2D_DC_DC_DISABLE_IGNORE_PE (1 << 17) /* DSTCACHE_CTLSTAT bit constants */ #define RB2D_DC_FLUSH_2D (1 << 0) @@ -869,10 +866,15 @@ #define GMC_DST_16BPP_YVYU422 0x00000c00 #define GMC_DST_32BPP_AYUV444 0x00000e00 #define GMC_DST_16BPP_ARGB4444 0x00000f00 +#define GMC_SRC_MONO 0x00000000 +#define GMC_SRC_MONO_LBKGD 0x00001000 +#define GMC_SRC_DSTCOLOR 0x00003000 #define GMC_BYTE_ORDER_MSB_TO_LSB 0x00000000 #define GMC_BYTE_ORDER_LSB_TO_MSB 0x00004000 #define GMC_DP_CONVERSION_TEMP_9300 0x00008000 #define GMC_DP_CONVERSION_TEMP_6500 0x00000000 +#define GMC_DP_SRC_RECT 0x02000000 +#define GMC_DP_SRC_HOST 0x03000000 #define GMC_DP_SRC_HOST_BYTEALIGN 0x04000000 #define GMC_3D_FCN_EN_CLR 0x00000000 #define GMC_3D_FCN_EN_SET 0x08000000 @@ -883,9 +885,6 @@ #define GMC_WRITE_MASK_LEAVE 0x00000000 #define GMC_WRITE_MASK_SET 0x40000000 #define GMC_CLR_CMP_CNTL_DIS (1 << 28) -#define GMC_SRC_DATATYPE_MASK (3 << 12) -#define GMC_SRC_DATATYPE_MONO_FG_BG (0 << 12) -#define GMC_SRC_DATATYPE_MONO_FG_LA (1 << 12) #define GMC_SRC_DATATYPE_COLOR (3 << 12) #define ROP3_S 0x00cc0000 #define ROP3_SRCCOPY 0x00cc0000 @@ -894,7 +893,6 @@ #define DP_SRC_SOURCE_MASK (7 << 24) #define GMC_BRUSH_NONE (15 << 4) #define DP_SRC_SOURCE_MEMORY (2 << 24) -#define DP_SRC_SOURCE_HOST_DATA (3 << 24) #define GMC_BRUSH_SOLIDCOLOR 0x000000d0 /* DP_MIX bit constants */ @@ -980,12 +978,6 @@ #define DISP_PWR_MAN_TV_ENABLE_RST (1 << 25) #define DISP_PWR_MAN_AUTO_PWRUP_EN (1 << 26) -/* RBBM_GUICNTL constants */ -#define RBBM_GUICNTL_HOST_DATA_SWAP_NONE (0 << 0) -#define RBBM_GUICNTL_HOST_DATA_SWAP_16BIT (1 << 0) -#define RBBM_GUICNTL_HOST_DATA_SWAP_32BIT (2 << 0) -#define RBBM_GUICNTL_HOST_DATA_SWAP_HDW (3 << 0) - /* masks */ #define CONFIG_MEMSIZE_MASK 0x1f000000 -- cgit v1.2.3 From 4d4be482a4d78ca906f45e99fd9fdb91e907f5ad Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Dec 2008 04:47:33 -0500 Subject: [XFS] add a FMODE flag to make XFS invisible I/O less hacky XFS has a mode called invisble I/O that doesn't update any of the timestamps. It's used for HSM-style applications and exposed through the nasty open by handle ioctl. Instead of doing directly assignment of file operations that set an internal flag for it add a new FMODE_NOCMTIME flag that we can check in the normal file operations. (addition of the generic VFS flag has been ACKed by Al as an interims solution) Signed-off-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_file.c | 145 ++++++----------------------------------- fs/xfs/linux-2.6/xfs_ioctl.c | 29 ++++++--- fs/xfs/linux-2.6/xfs_ioctl.h | 8 +-- fs/xfs/linux-2.6/xfs_ioctl32.c | 56 ++++++---------- fs/xfs/linux-2.6/xfs_iops.h | 1 - fs/xfs/xfs_vnodeops.h | 2 - include/linux/fs.h | 8 +++ 7 files changed, 71 insertions(+), 178 deletions(-) (limited to 'include') diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index f999d20a429c..a0c45cc8a6b8 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -45,80 +45,44 @@ static struct vm_operations_struct xfs_file_vm_ops; -STATIC_INLINE ssize_t -__xfs_file_read( +STATIC ssize_t +xfs_file_aio_read( struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, - int ioflags, loff_t pos) { struct file *file = iocb->ki_filp; + int ioflags = IO_ISAIO; BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; + if (file->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov, nr_segs, &iocb->ki_pos, ioflags); } STATIC ssize_t -xfs_file_aio_read( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos); -} - -STATIC ssize_t -xfs_file_aio_read_invis( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); -} - -STATIC_INLINE ssize_t -__xfs_file_write( +xfs_file_aio_write( struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, - int ioflags, loff_t pos) { - struct file *file = iocb->ki_filp; + struct file *file = iocb->ki_filp; + int ioflags = IO_ISAIO; BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; + if (file->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs, &iocb->ki_pos, ioflags); } -STATIC ssize_t -xfs_file_aio_write( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos); -} - -STATIC ssize_t -xfs_file_aio_write_invis( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); -} - STATIC ssize_t xfs_file_splice_read( struct file *infilp, @@ -127,20 +91,13 @@ xfs_file_splice_read( size_t len, unsigned int flags) { - return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), - infilp, ppos, pipe, len, flags, 0); -} + int ioflags = 0; + + if (infilp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; -STATIC ssize_t -xfs_file_splice_read_invis( - struct file *infilp, - loff_t *ppos, - struct pipe_inode_info *pipe, - size_t len, - unsigned int flags) -{ return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), - infilp, ppos, pipe, len, flags, IO_INVIS); + infilp, ppos, pipe, len, flags, ioflags); } STATIC ssize_t @@ -151,20 +108,13 @@ xfs_file_splice_write( size_t len, unsigned int flags) { - return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), - pipe, outfilp, ppos, len, flags, 0); -} + int ioflags = 0; + + if (outfilp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; -STATIC ssize_t -xfs_file_splice_write_invis( - struct pipe_inode_info *pipe, - struct file *outfilp, - loff_t *ppos, - size_t len, - unsigned int flags) -{ return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), - pipe, outfilp, ppos, len, flags, IO_INVIS); + pipe, outfilp, ppos, len, flags, ioflags); } STATIC int @@ -275,42 +225,6 @@ xfs_file_mmap( return 0; } -STATIC long -xfs_file_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - - - /* NOTE: some of the ioctl's return positive #'s as a - * byte count indicating success, such as - * readlink_by_handle. So we don't "sign flip" - * like most other routines. This means true - * errors need to be returned as a negative value. - */ - return xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p); -} - -STATIC long -xfs_file_ioctl_invis( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - - - /* NOTE: some of the ioctl's return positive #'s as a - * byte count indicating success, such as - * readlink_by_handle. So we don't "sign flip" - * like most other routines. This means true - * errors need to be returned as a negative value. - */ - return xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p); -} - /* * mmap()d file has taken write protection fault and is being made * writable. We can set the page state up correctly for a writable @@ -346,25 +260,6 @@ const struct file_operations xfs_file_operations = { #endif }; -const struct file_operations xfs_invis_file_operations = { - .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = xfs_file_aio_read_invis, - .aio_write = xfs_file_aio_write_invis, - .splice_read = xfs_file_splice_read_invis, - .splice_write = xfs_file_splice_write_invis, - .unlocked_ioctl = xfs_file_ioctl_invis, -#ifdef CONFIG_COMPAT - .compat_ioctl = xfs_file_compat_invis_ioctl, -#endif - .mmap = xfs_file_mmap, - .open = xfs_file_open, - .release = xfs_file_release, - .fsync = xfs_file_fsync, -}; - - const struct file_operations xfs_dir_file_operations = { .open = xfs_dir_open, .read = generic_read_dir, diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index c8f1e632ba94..0264c8719ffd 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -319,10 +319,11 @@ xfs_open_by_handle( put_unused_fd(new_fd); return -XFS_ERROR(-PTR_ERR(filp)); } + if (inode->i_mode & S_IFREG) { /* invisible operation should not change atime */ filp->f_flags |= O_NOATIME; - filp->f_op = &xfs_invis_file_operations; + filp->f_mode |= FMODE_NOCMTIME; } fd_install(new_fd, filp); @@ -1328,21 +1329,31 @@ xfs_ioc_getbmapx( return 0; } -int -xfs_ioctl( - xfs_inode_t *ip, +/* + * Note: some of the ioctl's return positive numbers as a + * byte count indicating success, such as readlink_by_handle. + * So we don't "sign flip" like most other routines. This means + * true errors need to be returned as a negative value. + */ +long +xfs_file_ioctl( struct file *filp, - int ioflags, unsigned int cmd, - void __user *arg) + unsigned long p) { struct inode *inode = filp->f_path.dentry->d_inode; - xfs_mount_t *mp = ip->i_mount; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + void __user *arg = (void __user *)p; + int ioflags = 0; int error; - xfs_itrace_entry(XFS_I(inode)); - switch (cmd) { + if (filp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + xfs_itrace_entry(ip); + + switch (cmd) { case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h index d92131c827bc..8c16bf2d7e03 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -68,13 +68,13 @@ xfs_attrmulti_attr_remove( __uint32_t flags); extern long -xfs_file_compat_ioctl( - struct file *file, +xfs_file_ioctl( + struct file *filp, unsigned int cmd, - unsigned long arg); + unsigned long p); extern long -xfs_file_compat_invis_ioctl( +xfs_file_compat_ioctl( struct file *file, unsigned int cmd, unsigned long arg); diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index b34b3d8892a2..0504cece9f66 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -599,19 +599,24 @@ out: return error; } -STATIC long -xfs_compat_ioctl( - xfs_inode_t *ip, - struct file *filp, - int ioflags, - unsigned cmd, - void __user *arg) +long +xfs_file_compat_ioctl( + struct file *filp, + unsigned cmd, + unsigned long p) { - struct inode *inode = filp->f_path.dentry->d_inode; - xfs_mount_t *mp = ip->i_mount; - int error; + struct inode *inode = filp->f_path.dentry->d_inode; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + void __user *arg = (void __user *)p; + int ioflags = 0; + int error; + + if (filp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + + xfs_itrace_entry(ip); - xfs_itrace_entry(XFS_I(inode)); switch (cmd) { /* No size or alignment issues on any arch */ case XFS_IOC_DIOINFO: @@ -632,7 +637,7 @@ xfs_compat_ioctl( case XFS_IOC_GOINGDOWN: case XFS_IOC_ERROR_INJECTION: case XFS_IOC_ERROR_CLEARALL: - return xfs_ioctl(ip, filp, ioflags, cmd, arg); + return xfs_file_ioctl(filp, cmd, p); #ifndef BROKEN_X86_ALIGNMENT /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: @@ -646,7 +651,7 @@ xfs_compat_ioctl( case XFS_IOC_FSGEOMETRY_V1: case XFS_IOC_FSGROWFSDATA: case XFS_IOC_FSGROWFSRT: - return xfs_ioctl(ip, filp, ioflags, cmd, arg); + return xfs_file_ioctl(filp, cmd, p); #else case XFS_IOC_ALLOCSP_32: case XFS_IOC_FREESP_32: @@ -687,7 +692,7 @@ xfs_compat_ioctl( case XFS_IOC_SETXFLAGS_32: case XFS_IOC_GETVERSION_32: cmd = _NATIVE_IOC(cmd, long); - return xfs_ioctl(ip, filp, ioflags, cmd, arg); + return xfs_file_ioctl(filp, cmd, p); case XFS_IOC_SWAPEXT: { struct xfs_swapext sxp; struct compat_xfs_swapext __user *sxu = arg; @@ -738,26 +743,3 @@ xfs_compat_ioctl( return -XFS_ERROR(ENOIOCTLCMD); } } - -long -xfs_file_compat_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - - return xfs_compat_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p); -} - -long -xfs_file_compat_invis_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - - return xfs_compat_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, - (void __user *)p); -} diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h index 8b1a1e31dc21..ef41c92ce66e 100644 --- a/fs/xfs/linux-2.6/xfs_iops.h +++ b/fs/xfs/linux-2.6/xfs_iops.h @@ -22,7 +22,6 @@ struct xfs_inode; extern const struct file_operations xfs_file_operations; extern const struct file_operations xfs_dir_file_operations; -extern const struct file_operations xfs_invis_file_operations; extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 2a45b00ad32e..55d955ec4ece 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -53,8 +53,6 @@ int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value, int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags); int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, int flags, struct attrlist_cursor_kern *cursor); -int xfs_ioctl(struct xfs_inode *ip, struct file *filp, - int ioflags, unsigned int cmd, void __user *arg); ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb, const struct iovec *iovp, unsigned int segs, loff_t *offset, int ioflags); diff --git a/include/linux/fs.h b/include/linux/fs.h index 51bd9370d437..965b9ba3865d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -81,6 +81,14 @@ extern int dir_notify_enable; #define FMODE_WRITE_IOCTL ((__force fmode_t)128) #define FMODE_NDELAY_NOW ((__force fmode_t)256) +/* + * Don't update ctime and mtime. + * + * Currently a special hack for the XFS open_by_handle ioctl, but we'll + * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. + */ +#define FMODE_NOCMTIME ((__force fmode_t)2048) + #define RW_MASK 1 #define RWA_MASK 2 #define READ 0 -- cgit v1.2.3 From c2724775ce57c98b8af9694857b941dc61056516 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Thu, 11 Dec 2008 13:49:59 +0100 Subject: x86, bts: provide in-kernel branch-trace interface Impact: cleanup Move the BTS bits from ptrace.c into ds.c. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ds.h | 241 ++++++----- arch/x86/include/asm/processor.h | 13 + arch/x86/include/asm/ptrace.h | 36 -- arch/x86/include/asm/thread_info.h | 5 +- arch/x86/kernel/cpu/intel.c | 4 - arch/x86/kernel/ds.c | 857 +++++++++++++++++++++++-------------- arch/x86/kernel/process_32.c | 59 +-- arch/x86/kernel/process_64.c | 50 +-- arch/x86/kernel/ptrace.c | 416 +++++------------- include/linux/sched.h | 1 + 10 files changed, 811 insertions(+), 871 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 99b6c39774a4..ee0ea3a96c11 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -6,13 +6,13 @@ * precise-event based sampling (PEBS). * * It manages: - * - per-thread and per-cpu allocation of BTS and PEBS + * - DS and BTS hardware configuration * - buffer overflow handling (to be done) * - buffer access * - * It assumes: - * - get_task_struct on all traced tasks - * - current is allowed to trace tasks + * It does not do: + * - security checking (is the caller allowed to trace the task) + * - buffer allocation (memory accounting) * * * Copyright (C) 2007-2008 Intel Corporation. @@ -31,6 +31,7 @@ #ifdef CONFIG_X86_DS struct task_struct; +struct ds_context; struct ds_tracer; struct bts_tracer; struct pebs_tracer; @@ -38,6 +39,38 @@ struct pebs_tracer; typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); + +/* + * A list of features plus corresponding macros to talk about them in + * the ds_request function's flags parameter. + * + * We use the enum to index an array of corresponding control bits; + * we use the macro to index a flags bit-vector. + */ +enum ds_feature { + dsf_bts = 0, + dsf_bts_kernel, +#define BTS_KERNEL (1 << dsf_bts_kernel) + /* trace kernel-mode branches */ + + dsf_bts_user, +#define BTS_USER (1 << dsf_bts_user) + /* trace user-mode branches */ + + dsf_bts_overflow, + dsf_bts_max, + dsf_pebs = dsf_bts_max, + + dsf_pebs_max, + dsf_ctl_max = dsf_pebs_max, + dsf_bts_timestamps = dsf_ctl_max, +#define BTS_TIMESTAMPS (1 << dsf_bts_timestamps) + /* add timestamps into BTS trace */ + +#define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS) +}; + + /* * Request BTS or PEBS * @@ -58,92 +91,135 @@ typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); * NULL if cyclic buffer requested * th: the interrupt threshold in records from the end of the buffer; * -1 if no interrupt threshold is requested. + * flags: a bit-mask of the above flags */ extern struct bts_tracer *ds_request_bts(struct task_struct *task, void *base, size_t size, - bts_ovfl_callback_t ovfl, size_t th); + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags); extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, void *base, size_t size, pebs_ovfl_callback_t ovfl, - size_t th); + size_t th, unsigned int flags); /* * Release BTS or PEBS resources - * - * Returns 0 on success; -Eerrno otherwise + * Suspend and resume BTS or PEBS tracing * * tracer: the tracer handle returned from ds_request_~() */ -extern int ds_release_bts(struct bts_tracer *tracer); -extern int ds_release_pebs(struct pebs_tracer *tracer); +extern void ds_release_bts(struct bts_tracer *tracer); +extern void ds_suspend_bts(struct bts_tracer *tracer); +extern void ds_resume_bts(struct bts_tracer *tracer); +extern void ds_release_pebs(struct pebs_tracer *tracer); +extern void ds_suspend_pebs(struct pebs_tracer *tracer); +extern void ds_resume_pebs(struct pebs_tracer *tracer); + /* - * Get the (array) index of the write pointer. - * (assuming an array of BTS/PEBS records) - * - * Returns 0 on success; -Eerrno on error + * The raw DS buffer state as it is used for BTS and PEBS recording. * - * tracer: the tracer handle returned from ds_request_~() - * pos (out): will hold the result + * This is the low-level, arch-dependent interface for working + * directly on the raw trace data. */ -extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos); -extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos); +struct ds_trace { + /* the number of bts/pebs records */ + size_t n; + /* the size of a bts/pebs record in bytes */ + size_t size; + /* pointers into the raw buffer: + - to the first entry */ + void *begin; + /* - one beyond the last entry */ + void *end; + /* - one beyond the newest entry */ + void *top; + /* - the interrupt threshold */ + void *ith; + /* flags given on ds_request() */ + unsigned int flags; +}; /* - * Get the (array) index one record beyond the end of the array. - * (assuming an array of BTS/PEBS records) - * - * Returns 0 on success; -Eerrno on error - * - * tracer: the tracer handle returned from ds_request_~() - * pos (out): will hold the result + * An arch-independent view on branch trace data. */ -extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos); -extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos); +enum bts_qualifier { + bts_invalid, +#define BTS_INVALID bts_invalid + + bts_branch, +#define BTS_BRANCH bts_branch + + bts_task_arrives, +#define BTS_TASK_ARRIVES bts_task_arrives + + bts_task_departs, +#define BTS_TASK_DEPARTS bts_task_departs + + bts_qual_bit_size = 4, + bts_qual_max = (1 << bts_qual_bit_size), +}; + +struct bts_struct { + __u64 qualifier; + union { + /* BTS_BRANCH */ + struct { + __u64 from; + __u64 to; + } lbr; + /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ + struct { + __u64 jiffies; + pid_t pid; + } timestamp; + } variant; +}; + /* - * Provide a pointer to the BTS/PEBS record at parameter index. - * (assuming an array of BTS/PEBS records) - * - * The pointer points directly into the buffer. The user is - * responsible for copying the record. - * - * Returns the size of a single record on success; -Eerrno on error + * The BTS state. * - * tracer: the tracer handle returned from ds_request_~() - * index: the index of the requested record - * record (out): pointer to the requested record + * This gives access to the raw DS state and adds functions to provide + * an arch-independent view of the BTS data. */ -extern int ds_access_bts(struct bts_tracer *tracer, - size_t index, const void **record); -extern int ds_access_pebs(struct pebs_tracer *tracer, - size_t index, const void **record); +struct bts_trace { + struct ds_trace ds; + + int (*read)(struct bts_tracer *tracer, const void *at, + struct bts_struct *out); + int (*write)(struct bts_tracer *tracer, const struct bts_struct *in); +}; + /* - * Write one or more BTS/PEBS records at the write pointer index and - * advance the write pointer. + * The PEBS state. * - * If size is not a multiple of the record size, trailing bytes are - * zeroed out. - * - * May result in one or more overflow notifications. - * - * If called during overflow handling, that is, with index >= - * interrupt threshold, the write will wrap around. + * This gives access to the raw DS state and the PEBS-specific counter + * reset value. + */ +struct pebs_trace { + struct ds_trace ds; + + /* the PEBS reset value */ + unsigned long long reset_value; +}; + + +/* + * Read the BTS or PEBS trace. * - * An overflow notification is given if and when the interrupt - * threshold is reached during or after the write. + * Returns a view on the trace collected for the parameter tracer. * - * Returns the number of bytes written or -Eerrno. + * The view remains valid as long as the traced task is not running or + * the tracer is suspended. + * Writes into the trace buffer are not reflected. * * tracer: the tracer handle returned from ds_request_~() - * buffer: the buffer to write - * size: the size of the buffer */ -extern int ds_write_bts(struct bts_tracer *tracer, - const void *buffer, size_t size); -extern int ds_write_pebs(struct pebs_tracer *tracer, - const void *buffer, size_t size); +extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer); +extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer); + /* * Reset the write pointer of the BTS/PEBS buffer. @@ -155,27 +231,6 @@ extern int ds_write_pebs(struct pebs_tracer *tracer, extern int ds_reset_bts(struct bts_tracer *tracer); extern int ds_reset_pebs(struct pebs_tracer *tracer); -/* - * Clear the BTS/PEBS buffer and reset the write pointer. - * The entire buffer will be zeroed out. - * - * Returns 0 on success; -Eerrno on error - * - * tracer: the tracer handle returned from ds_request_~() - */ -extern int ds_clear_bts(struct bts_tracer *tracer); -extern int ds_clear_pebs(struct pebs_tracer *tracer); - -/* - * Provide the PEBS counter reset value. - * - * Returns 0 on success; -Eerrno on error - * - * tracer: the tracer handle returned from ds_request_pebs() - * value (out): the counter reset value - */ -extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value); - /* * Set the PEBS counter reset value. * @@ -192,35 +247,17 @@ extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); struct cpuinfo_x86; extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); - - /* - * The DS context - part of struct thread_struct. + * Context switch work */ -#define MAX_SIZEOF_DS (12 * 8) - -struct ds_context { - /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ - unsigned char ds[MAX_SIZEOF_DS]; - /* the owner of the BTS and PEBS configuration, respectively */ - struct ds_tracer *owner[2]; - /* use count */ - unsigned long count; - /* a pointer to the context location inside the thread_struct - * or the per_cpu context array */ - struct ds_context **this; - /* a pointer to the task owning this context, or NULL, if the - * context is owned by a cpu */ - struct task_struct *task; -}; - -/* called by exit_thread() to free leftover contexts */ -extern void ds_free(struct ds_context *context); +extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); #else /* CONFIG_X86_DS */ struct cpuinfo_x86; static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} +static inline void ds_switch_to(struct task_struct *prev, + struct task_struct *next) {} #endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5ca01e383269..aa5914f8e501 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -752,6 +752,19 @@ extern void switch_to_new_gdt(void); extern void cpu_init(void); extern void init_gdt(int cpu); +static inline unsigned long get_debugctlmsr(void) +{ + unsigned long debugctlmsr = 0; + +#ifndef CONFIG_X86_DEBUGCTLMSR + if (boot_cpu_data.x86 < 6) + return 0; +#endif + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); + + return debugctlmsr; +} + static inline void update_debugctlmsr(unsigned long debugctlmsr) { #ifndef CONFIG_X86_DEBUGCTLMSR diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index eefb0594b058..fbf744215911 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -6,7 +6,6 @@ #include #ifdef __KERNEL__ -#include /* the DS BTS struct is used for ptrace too */ #include #endif @@ -128,34 +127,6 @@ struct pt_regs { #endif /* !__i386__ */ -#ifdef CONFIG_X86_PTRACE_BTS -/* a branch trace record entry - * - * In order to unify the interface between various processor versions, - * we use the below data structure for all processors. - */ -enum bts_qualifier { - BTS_INVALID = 0, - BTS_BRANCH, - BTS_TASK_ARRIVES, - BTS_TASK_DEPARTS -}; - -struct bts_struct { - __u64 qualifier; - union { - /* BTS_BRANCH */ - struct { - __u64 from_ip; - __u64 to_ip; - } lbr; - /* BTS_TASK_ARRIVES or - BTS_TASK_DEPARTS */ - __u64 jiffies; - } variant; -}; -#endif /* CONFIG_X86_PTRACE_BTS */ - #ifdef __KERNEL__ #include @@ -163,13 +134,6 @@ struct bts_struct { struct cpuinfo_x86; struct task_struct; -#ifdef CONFIG_X86_PTRACE_BTS -extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *); -extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); -#else -#define ptrace_bts_init_intel(config) do {} while (0) -#endif /* CONFIG_X86_PTRACE_BTS */ - extern unsigned long profile_pc(struct pt_regs *regs); extern unsigned long diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 0921b4018c11..bf8113d16a33 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -93,7 +93,6 @@ struct thread_info { #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ -#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -115,7 +114,6 @@ struct thread_info { #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) -#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) /* work to do in syscall_trace_enter() */ #define _TIF_WORK_SYSCALL_ENTRY \ @@ -141,8 +139,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \ - _TIF_NOTSC) + (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 816f27f289b1..cd413d9a0218 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include @@ -309,9 +308,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_P3); #endif - if (cpu_has_bts) - ptrace_bts_init_intel(c); - detect_extended_topology(c); if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { /* diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 095306988667..f0583005b75e 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -6,13 +6,13 @@ * precise-event based sampling (PEBS). * * It manages: - * - per-thread and per-cpu allocation of BTS and PEBS + * - DS and BTS hardware configuration * - buffer overflow handling (to be done) * - buffer access * - * It assumes: - * - get_task_struct on all traced tasks - * - current is allowed to trace tasks + * It does not do: + * - security checking (is the caller allowed to trace the task) + * - buffer allocation (memory accounting) * * * Copyright (C) 2007-2008 Intel Corporation. @@ -34,15 +34,30 @@ * The configuration for a particular DS hardware implementation. */ struct ds_configuration { - /* the size of the DS structure in bytes */ - unsigned char sizeof_ds; - /* the size of one pointer-typed field in the DS structure in bytes; - this covers the first 8 fields related to buffer management. */ + /* the name of the configuration */ + const char *name; + /* the size of one pointer-typed field in the DS structure and + in the BTS and PEBS buffers in bytes; + this covers the first 8 DS fields related to buffer management. */ unsigned char sizeof_field; /* the size of a BTS/PEBS record in bytes */ unsigned char sizeof_rec[2]; + /* a series of bit-masks to control various features indexed + * by enum ds_feature */ + unsigned long ctl[dsf_ctl_max]; }; -static struct ds_configuration ds_cfg; +static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); + +#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) + +#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ +#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ +#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ + +#define BTS_CONTROL \ + (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ + ds_cfg.ctl[dsf_bts_overflow]) + /* * A BTS or PEBS tracer. @@ -61,6 +76,8 @@ struct ds_tracer { struct bts_tracer { /* the common DS part */ struct ds_tracer ds; + /* the trace including the DS configuration */ + struct bts_trace trace; /* buffer overflow notification function */ bts_ovfl_callback_t ovfl; }; @@ -68,6 +85,8 @@ struct bts_tracer { struct pebs_tracer { /* the common DS part */ struct ds_tracer ds; + /* the trace including the DS configuration */ + struct pebs_trace trace; /* buffer overflow notification function */ pebs_ovfl_callback_t ovfl; }; @@ -134,13 +153,11 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, (*(unsigned long *)base) = value; } -#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ - /* * Locking is done only for allocating BTS or PEBS resources. */ -static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); +static DEFINE_SPINLOCK(ds_lock); /* @@ -156,27 +173,32 @@ static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); * >0 number of per-thread tracers * <0 number of per-cpu tracers * - * The below functions to get and put tracers and to check the - * allocation type require the ds_lock to be held by the caller. - * * Tracers essentially gives the number of ds contexts for a certain * type of allocation. */ -static long tracers; +static atomic_t tracers = ATOMIC_INIT(0); static inline void get_tracer(struct task_struct *task) { - tracers += (task ? 1 : -1); + if (task) + atomic_inc(&tracers); + else + atomic_dec(&tracers); } static inline void put_tracer(struct task_struct *task) { - tracers -= (task ? 1 : -1); + if (task) + atomic_dec(&tracers); + else + atomic_inc(&tracers); } static inline int check_tracer(struct task_struct *task) { - return (task ? (tracers >= 0) : (tracers <= 0)); + return task ? + (atomic_read(&tracers) >= 0) : + (atomic_read(&tracers) <= 0); } @@ -190,14 +212,30 @@ static inline int check_tracer(struct task_struct *task) * Contexts are use-counted. They are allocated on first access and * deallocated when the last user puts the context. */ -static DEFINE_PER_CPU(struct ds_context *, system_context); +struct ds_context { + /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ + unsigned char ds[MAX_SIZEOF_DS]; + /* the owner of the BTS and PEBS configuration, respectively */ + struct bts_tracer *bts_master; + struct pebs_tracer *pebs_master; + /* use count */ + unsigned long count; + /* a pointer to the context location inside the thread_struct + * or the per_cpu context array */ + struct ds_context **this; + /* a pointer to the task owning this context, or NULL, if the + * context is owned by a cpu */ + struct task_struct *task; +}; + +static DEFINE_PER_CPU(struct ds_context *, system_context_array); -#define this_system_context per_cpu(system_context, smp_processor_id()) +#define system_context per_cpu(system_context_array, smp_processor_id()) static inline struct ds_context *ds_get_context(struct task_struct *task) { struct ds_context **p_context = - (task ? &task->thread.ds_ctx : &this_system_context); + (task ? &task->thread.ds_ctx : &system_context); struct ds_context *context = *p_context; unsigned long irq; @@ -225,10 +263,22 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); } + + context->count++; + + spin_unlock_irqrestore(&ds_lock, irq); + } else { + spin_lock_irqsave(&ds_lock, irq); + + context = *p_context; + if (context) + context->count++; + spin_unlock_irqrestore(&ds_lock, irq); - } - context->count++; + if (!context) + context = ds_get_context(task); + } return context; } @@ -242,8 +292,10 @@ static inline void ds_put_context(struct ds_context *context) spin_lock_irqsave(&ds_lock, irq); - if (--context->count) - goto out; + if (--context->count) { + spin_unlock_irqrestore(&ds_lock, irq); + return; + } *(context->this) = NULL; @@ -253,14 +305,14 @@ static inline void ds_put_context(struct ds_context *context) if (!context->task || (context->task == current)) wrmsrl(MSR_IA32_DS_AREA, 0); - kfree(context); - out: spin_unlock_irqrestore(&ds_lock, irq); + + kfree(context); } /* - * Handle a buffer overflow + * Call the tracer's callback on a buffer overflow. * * context: the ds context * qual: the buffer type @@ -268,30 +320,244 @@ static inline void ds_put_context(struct ds_context *context) static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) { switch (qual) { - case ds_bts: { - struct bts_tracer *tracer = - container_of(context->owner[qual], - struct bts_tracer, ds); - if (tracer->ovfl) - tracer->ovfl(tracer); - } + case ds_bts: + if (context->bts_master && + context->bts_master->ovfl) + context->bts_master->ovfl(context->bts_master); + break; + case ds_pebs: + if (context->pebs_master && + context->pebs_master->ovfl) + context->pebs_master->ovfl(context->pebs_master); break; - case ds_pebs: { - struct pebs_tracer *tracer = - container_of(context->owner[qual], - struct pebs_tracer, ds); - if (tracer->ovfl) - tracer->ovfl(tracer); } +} + + +/* + * Write raw data into the BTS or PEBS buffer. + * + * The remainder of any partially written record is zeroed out. + * + * context: the DS context + * qual: the buffer type + * record: the data to write + * size: the size of the data + */ +static int ds_write(struct ds_context *context, enum ds_qualifier qual, + const void *record, size_t size) +{ + int bytes_written = 0; + + if (!record) + return -EINVAL; + + while (size) { + unsigned long base, index, end, write_end, int_th; + unsigned long write_size, adj_write_size; + + /* + * write as much as possible without producing an + * overflow interrupt. + * + * interrupt_threshold must either be + * - bigger than absolute_maximum or + * - point to a record between buffer_base and absolute_maximum + * + * index points to a valid record. + */ + base = ds_get(context->ds, qual, ds_buffer_base); + index = ds_get(context->ds, qual, ds_index); + end = ds_get(context->ds, qual, ds_absolute_maximum); + int_th = ds_get(context->ds, qual, ds_interrupt_threshold); + + write_end = min(end, int_th); + + /* if we are already beyond the interrupt threshold, + * we fill the entire buffer */ + if (write_end <= index) + write_end = end; + + if (write_end <= index) + break; + + write_size = min((unsigned long) size, write_end - index); + memcpy((void *)index, record, write_size); + + record = (const char *)record + write_size; + size -= write_size; + bytes_written += write_size; + + adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; + adj_write_size *= ds_cfg.sizeof_rec[qual]; + + /* zero out trailing bytes */ + memset((char *)index + write_size, 0, + adj_write_size - write_size); + index += adj_write_size; + + if (index >= end) + index = base; + ds_set(context->ds, qual, ds_index, index); + + if (index >= int_th) + ds_overflow(context, qual); + } + + return bytes_written; +} + + +/* + * Branch Trace Store (BTS) uses the following format. Different + * architectures vary in the size of those fields. + * - source linear address + * - destination linear address + * - flags + * + * Later architectures use 64bit pointers throughout, whereas earlier + * architectures use 32bit pointers in 32bit mode. + * + * We compute the base address for the first 8 fields based on: + * - the field size stored in the DS configuration + * - the relative field position + * + * In order to store additional information in the BTS buffer, we use + * a special source address to indicate that the record requires + * special interpretation. + * + * Netburst indicated via a bit in the flags field whether the branch + * was predicted; this is ignored. + * + * We use two levels of abstraction: + * - the raw data level defined here + * - an arch-independent level defined in ds.h + */ + +enum bts_field { + bts_from, + bts_to, + bts_flags, + + bts_qual = bts_from, + bts_jiffies = bts_to, + bts_pid = bts_flags, + + bts_qual_mask = (bts_qual_max - 1), + bts_escape = ((unsigned long)-1 & ~bts_qual_mask) +}; + +static inline unsigned long bts_get(const char *base, enum bts_field field) +{ + base += (ds_cfg.sizeof_field * field); + return *(unsigned long *)base; +} + +static inline void bts_set(char *base, enum bts_field field, unsigned long val) +{ + base += (ds_cfg.sizeof_field * field);; + (*(unsigned long *)base) = val; +} + + +/* + * The raw BTS data is architecture dependent. + * + * For higher-level users, we give an arch-independent view. + * - ds.h defines struct bts_struct + * - bts_read translates one raw bts record into a bts_struct + * - bts_write translates one bts_struct into the raw format and + * writes it into the top of the parameter tracer's buffer. + * + * return: bytes read/written on success; -Eerrno, otherwise + */ +static int bts_read(struct bts_tracer *tracer, const void *at, + struct bts_struct *out) +{ + if (!tracer) + return -EINVAL; + + if (at < tracer->trace.ds.begin) + return -EINVAL; + + if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) + return -EINVAL; + + memset(out, 0, sizeof(*out)); + if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { + out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); + out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); + out->variant.timestamp.pid = bts_get(at, bts_pid); + } else { + out->qualifier = bts_branch; + out->variant.lbr.from = bts_get(at, bts_from); + out->variant.lbr.to = bts_get(at, bts_to); + } + + return ds_cfg.sizeof_rec[ds_bts]; +} + +static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) +{ + unsigned char raw[MAX_SIZEOF_BTS]; + + if (!tracer) + return -EINVAL; + + if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) + return -EOVERFLOW; + + switch (in->qualifier) { + case bts_invalid: + bts_set(raw, bts_from, 0); + bts_set(raw, bts_to, 0); + bts_set(raw, bts_flags, 0); + break; + case bts_branch: + bts_set(raw, bts_from, in->variant.lbr.from); + bts_set(raw, bts_to, in->variant.lbr.to); + bts_set(raw, bts_flags, 0); + break; + case bts_task_arrives: + case bts_task_departs: + bts_set(raw, bts_qual, (bts_escape | in->qualifier)); + bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); + bts_set(raw, bts_pid, in->variant.timestamp.pid); break; + default: + return -EINVAL; } + + return ds_write(tracer->ds.context, ds_bts, raw, + ds_cfg.sizeof_rec[ds_bts]); } -static void ds_install_ds_config(struct ds_context *context, - enum ds_qualifier qual, - void *base, size_t size, size_t ith) +static void ds_write_config(struct ds_context *context, + struct ds_trace *cfg, enum ds_qualifier qual) +{ + unsigned char *ds = context->ds; + + ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); + ds_set(ds, qual, ds_index, (unsigned long)cfg->top); + ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); + ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); +} + +static void ds_read_config(struct ds_context *context, + struct ds_trace *cfg, enum ds_qualifier qual) { + unsigned char *ds = context->ds; + + cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); + cfg->top = (void *)ds_get(ds, qual, ds_index); + cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); + cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); +} + +static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, + void *base, size_t size, size_t ith, + unsigned int flags) { unsigned long buffer, adj; /* adjust the buffer address and size to meet alignment @@ -308,32 +574,30 @@ static void ds_install_ds_config(struct ds_context *context, buffer += adj; size -= adj; - size /= ds_cfg.sizeof_rec[qual]; - size *= ds_cfg.sizeof_rec[qual]; + trace->n = size / ds_cfg.sizeof_rec[qual]; + trace->size = ds_cfg.sizeof_rec[qual]; - ds_set(context->ds, qual, ds_buffer_base, buffer); - ds_set(context->ds, qual, ds_index, buffer); - ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); + size = (trace->n * trace->size); + trace->begin = (void *)buffer; + trace->top = trace->begin; + trace->end = (void *)(buffer + size); /* The value for 'no threshold' is -1, which will set the * threshold outside of the buffer, just like we want it. */ - ds_set(context->ds, qual, - ds_interrupt_threshold, buffer + size - ith); + trace->ith = (void *)(buffer + size - ith); + + trace->flags = flags; } -static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual, - struct task_struct *task, - void *base, size_t size, size_t th) + +static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, + enum ds_qualifier qual, struct task_struct *task, + void *base, size_t size, size_t th, unsigned int flags) { struct ds_context *context; - unsigned long irq; int error; - error = -EOPNOTSUPP; - if (!ds_cfg.sizeof_ds) - goto out; - error = -EINVAL; if (!base) goto out; @@ -360,43 +624,26 @@ static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual, goto out; tracer->context = context; + ds_init_ds_trace(trace, qual, base, size, th, flags); - spin_lock_irqsave(&ds_lock, irq); - - error = -EPERM; - if (!check_tracer(task)) - goto out_unlock; - get_tracer(task); - - error = -EPERM; - if (context->owner[qual]) - goto out_put_tracer; - context->owner[qual] = tracer; - - spin_unlock_irqrestore(&ds_lock, irq); - - - ds_install_ds_config(context, qual, base, size, th); - - return 0; - - out_put_tracer: - put_tracer(task); - out_unlock: - spin_unlock_irqrestore(&ds_lock, irq); - ds_put_context(context); - tracer->context = NULL; + error = 0; out: return error; } struct bts_tracer *ds_request_bts(struct task_struct *task, void *base, size_t size, - bts_ovfl_callback_t ovfl, size_t th) + bts_ovfl_callback_t ovfl, size_t th, + unsigned int flags) { struct bts_tracer *tracer; + unsigned long irq; int error; + error = -EOPNOTSUPP; + if (!ds_cfg.ctl[dsf_bts]) + goto out; + /* buffer overflow notification is not yet implemented */ error = -EOPNOTSUPP; if (ovfl) @@ -408,12 +655,40 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, goto out; tracer->ovfl = ovfl; - error = ds_request(&tracer->ds, ds_bts, task, base, size, th); + error = ds_request(&tracer->ds, &tracer->trace.ds, + ds_bts, task, base, size, th, flags); if (error < 0) goto out_tracer; + + spin_lock_irqsave(&ds_lock, irq); + + error = -EPERM; + if (!check_tracer(task)) + goto out_unlock; + get_tracer(task); + + error = -EPERM; + if (tracer->ds.context->bts_master) + goto out_put_tracer; + tracer->ds.context->bts_master = tracer; + + spin_unlock_irqrestore(&ds_lock, irq); + + + tracer->trace.read = bts_read; + tracer->trace.write = bts_write; + + ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); + ds_resume_bts(tracer); + return tracer; + out_put_tracer: + put_tracer(task); + out_unlock: + spin_unlock_irqrestore(&ds_lock, irq); + ds_put_context(tracer->ds.context); out_tracer: kfree(tracer); out: @@ -422,9 +697,11 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, struct pebs_tracer *ds_request_pebs(struct task_struct *task, void *base, size_t size, - pebs_ovfl_callback_t ovfl, size_t th) + pebs_ovfl_callback_t ovfl, size_t th, + unsigned int flags) { struct pebs_tracer *tracer; + unsigned long irq; int error; /* buffer overflow notification is not yet implemented */ @@ -438,300 +715,171 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, goto out; tracer->ovfl = ovfl; - error = ds_request(&tracer->ds, ds_pebs, task, base, size, th); + error = ds_request(&tracer->ds, &tracer->trace.ds, + ds_pebs, task, base, size, th, flags); if (error < 0) goto out_tracer; + spin_lock_irqsave(&ds_lock, irq); + + error = -EPERM; + if (!check_tracer(task)) + goto out_unlock; + get_tracer(task); + + error = -EPERM; + if (tracer->ds.context->pebs_master) + goto out_put_tracer; + tracer->ds.context->pebs_master = tracer; + + spin_unlock_irqrestore(&ds_lock, irq); + + ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); + ds_resume_pebs(tracer); + return tracer; + out_put_tracer: + put_tracer(task); + out_unlock: + spin_unlock_irqrestore(&ds_lock, irq); + ds_put_context(tracer->ds.context); out_tracer: kfree(tracer); out: return ERR_PTR(error); } -static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual) -{ - WARN_ON_ONCE(tracer->context->owner[qual] != tracer); - tracer->context->owner[qual] = NULL; - - put_tracer(tracer->context->task); - ds_put_context(tracer->context); -} - -int ds_release_bts(struct bts_tracer *tracer) +void ds_release_bts(struct bts_tracer *tracer) { if (!tracer) - return -EINVAL; + return; - ds_release(&tracer->ds, ds_bts); - kfree(tracer); + ds_suspend_bts(tracer); - return 0; -} + WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); + tracer->ds.context->bts_master = NULL; -int ds_release_pebs(struct pebs_tracer *tracer) -{ - if (!tracer) - return -EINVAL; + put_tracer(tracer->ds.context->task); + ds_put_context(tracer->ds.context); - ds_release(&tracer->ds, ds_pebs); kfree(tracer); - - return 0; -} - -static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual) -{ - unsigned long base, index; - - base = ds_get(context->ds, qual, ds_buffer_base); - index = ds_get(context->ds, qual, ds_index); - - return (index - base) / ds_cfg.sizeof_rec[qual]; } -int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos) +void ds_suspend_bts(struct bts_tracer *tracer) { - if (!tracer) - return -EINVAL; + struct task_struct *task; - if (!pos) - return -EINVAL; - - *pos = ds_get_index(tracer->ds.context, ds_bts); - - return 0; -} - -int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos) -{ if (!tracer) - return -EINVAL; + return; - if (!pos) - return -EINVAL; + task = tracer->ds.context->task; - *pos = ds_get_index(tracer->ds.context, ds_pebs); + if (!task || (task == current)) + update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); - return 0; -} + if (task) { + task->thread.debugctlmsr &= ~BTS_CONTROL; -static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual) -{ - unsigned long base, max; - - base = ds_get(context->ds, qual, ds_buffer_base); - max = ds_get(context->ds, qual, ds_absolute_maximum); - - return (max - base) / ds_cfg.sizeof_rec[qual]; + if (!task->thread.debugctlmsr) + clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); + } } -int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos) +void ds_resume_bts(struct bts_tracer *tracer) { - if (!tracer) - return -EINVAL; - - if (!pos) - return -EINVAL; - - *pos = ds_get_end(tracer->ds.context, ds_bts); - - return 0; -} + struct task_struct *task; + unsigned long control; -int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos) -{ if (!tracer) - return -EINVAL; - - if (!pos) - return -EINVAL; - - *pos = ds_get_end(tracer->ds.context, ds_pebs); - - return 0; -} - -static int ds_access(struct ds_context *context, enum ds_qualifier qual, - size_t index, const void **record) -{ - unsigned long base, idx; - - if (!record) - return -EINVAL; - - base = ds_get(context->ds, qual, ds_buffer_base); - idx = base + (index * ds_cfg.sizeof_rec[qual]); - - if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) - return -EINVAL; + return; - *record = (const void *)idx; + task = tracer->ds.context->task; - return ds_cfg.sizeof_rec[qual]; -} + control = ds_cfg.ctl[dsf_bts]; + if (!(tracer->trace.ds.flags & BTS_KERNEL)) + control |= ds_cfg.ctl[dsf_bts_kernel]; + if (!(tracer->trace.ds.flags & BTS_USER)) + control |= ds_cfg.ctl[dsf_bts_user]; -int ds_access_bts(struct bts_tracer *tracer, size_t index, - const void **record) -{ - if (!tracer) - return -EINVAL; + if (task) { + task->thread.debugctlmsr |= control; + set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); + } - return ds_access(tracer->ds.context, ds_bts, index, record); + if (!task || (task == current)) + update_debugctlmsr(get_debugctlmsr() | control); } -int ds_access_pebs(struct pebs_tracer *tracer, size_t index, - const void **record) +void ds_release_pebs(struct pebs_tracer *tracer) { if (!tracer) - return -EINVAL; - - return ds_access(tracer->ds.context, ds_pebs, index, record); -} - -static int ds_write(struct ds_context *context, enum ds_qualifier qual, - const void *record, size_t size) -{ - int bytes_written = 0; - - if (!record) - return -EINVAL; - - while (size) { - unsigned long base, index, end, write_end, int_th; - unsigned long write_size, adj_write_size; - - /* - * write as much as possible without producing an - * overflow interrupt. - * - * interrupt_threshold must either be - * - bigger than absolute_maximum or - * - point to a record between buffer_base and absolute_maximum - * - * index points to a valid record. - */ - base = ds_get(context->ds, qual, ds_buffer_base); - index = ds_get(context->ds, qual, ds_index); - end = ds_get(context->ds, qual, ds_absolute_maximum); - int_th = ds_get(context->ds, qual, ds_interrupt_threshold); - - write_end = min(end, int_th); - - /* if we are already beyond the interrupt threshold, - * we fill the entire buffer */ - if (write_end <= index) - write_end = end; - - if (write_end <= index) - break; - - write_size = min((unsigned long) size, write_end - index); - memcpy((void *)index, record, write_size); - - record = (const char *)record + write_size; - size -= write_size; - bytes_written += write_size; - - adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; - adj_write_size *= ds_cfg.sizeof_rec[qual]; - - /* zero out trailing bytes */ - memset((char *)index + write_size, 0, - adj_write_size - write_size); - index += adj_write_size; + return; - if (index >= end) - index = base; - ds_set(context->ds, qual, ds_index, index); + ds_suspend_pebs(tracer); - if (index >= int_th) - ds_overflow(context, qual); - } + WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); + tracer->ds.context->pebs_master = NULL; - return bytes_written; -} + put_tracer(tracer->ds.context->task); + ds_put_context(tracer->ds.context); -int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size) -{ - if (!tracer) - return -EINVAL; - - return ds_write(tracer->ds.context, ds_bts, record, size); + kfree(tracer); } -int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size) +void ds_suspend_pebs(struct pebs_tracer *tracer) { - if (!tracer) - return -EINVAL; - return ds_write(tracer->ds.context, ds_pebs, record, size); } -static void ds_reset_or_clear(struct ds_context *context, - enum ds_qualifier qual, int clear) +void ds_resume_pebs(struct pebs_tracer *tracer) { - unsigned long base, end; - - base = ds_get(context->ds, qual, ds_buffer_base); - end = ds_get(context->ds, qual, ds_absolute_maximum); - - if (clear) - memset((void *)base, 0, end - base); - ds_set(context->ds, qual, ds_index, base); } -int ds_reset_bts(struct bts_tracer *tracer) +const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) { if (!tracer) - return -EINVAL; - - ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0); + return NULL; - return 0; + ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); + return &tracer->trace; } -int ds_reset_pebs(struct pebs_tracer *tracer) +const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) { if (!tracer) - return -EINVAL; + return NULL; - ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0); + ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); + tracer->trace.reset_value = + *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); - return 0; + return &tracer->trace; } -int ds_clear_bts(struct bts_tracer *tracer) +int ds_reset_bts(struct bts_tracer *tracer) { if (!tracer) return -EINVAL; - ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1); - - return 0; -} - -int ds_clear_pebs(struct pebs_tracer *tracer) -{ - if (!tracer) - return -EINVAL; + tracer->trace.ds.top = tracer->trace.ds.begin; - ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1); + ds_set(tracer->ds.context->ds, ds_bts, ds_index, + (unsigned long)tracer->trace.ds.top); return 0; } -int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value) +int ds_reset_pebs(struct pebs_tracer *tracer) { if (!tracer) return -EINVAL; - if (!value) - return -EINVAL; + tracer->trace.ds.top = tracer->trace.ds.begin; - *value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); + ds_set(tracer->ds.context->ds, ds_bts, ds_index, + (unsigned long)tracer->trace.ds.top); return 0; } @@ -746,35 +894,59 @@ int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) return 0; } -static const struct ds_configuration ds_cfg_var = { - .sizeof_ds = sizeof(long) * 12, - .sizeof_field = sizeof(long), - .sizeof_rec[ds_bts] = sizeof(long) * 3, +static const struct ds_configuration ds_cfg_netburst = { + .name = "netburst", + .ctl[dsf_bts] = (1 << 2) | (1 << 3), + .ctl[dsf_bts_kernel] = (1 << 5), + .ctl[dsf_bts_user] = (1 << 6), + + .sizeof_field = sizeof(long), + .sizeof_rec[ds_bts] = sizeof(long) * 3, #ifdef __i386__ - .sizeof_rec[ds_pebs] = sizeof(long) * 10 + .sizeof_rec[ds_pebs] = sizeof(long) * 10, #else - .sizeof_rec[ds_pebs] = sizeof(long) * 18 + .sizeof_rec[ds_pebs] = sizeof(long) * 18, #endif }; -static const struct ds_configuration ds_cfg_64 = { - .sizeof_ds = 8 * 12, - .sizeof_field = 8, - .sizeof_rec[ds_bts] = 8 * 3, +static const struct ds_configuration ds_cfg_pentium_m = { + .name = "pentium m", + .ctl[dsf_bts] = (1 << 6) | (1 << 7), + + .sizeof_field = sizeof(long), + .sizeof_rec[ds_bts] = sizeof(long) * 3, #ifdef __i386__ - .sizeof_rec[ds_pebs] = 8 * 10 + .sizeof_rec[ds_pebs] = sizeof(long) * 10, #else - .sizeof_rec[ds_pebs] = 8 * 18 + .sizeof_rec[ds_pebs] = sizeof(long) * 18, #endif }; +static const struct ds_configuration ds_cfg_core2 = { + .name = "core 2", + .ctl[dsf_bts] = (1 << 6) | (1 << 7), + .ctl[dsf_bts_kernel] = (1 << 9), + .ctl[dsf_bts_user] = (1 << 10), + + .sizeof_field = 8, + .sizeof_rec[ds_bts] = 8 * 3, + .sizeof_rec[ds_pebs] = 8 * 18, +}; -static inline void +static void ds_configure(const struct ds_configuration *cfg) { + memset(&ds_cfg, 0, sizeof(ds_cfg)); ds_cfg = *cfg; - printk(KERN_INFO "DS available\n"); + printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); + + if (!cpu_has_bts) { + ds_cfg.ctl[dsf_bts] = 0; + printk(KERN_INFO "[ds] bts not available\n"); + } + if (!cpu_has_pebs) + printk(KERN_INFO "[ds] pebs not available\n"); - WARN_ON_ONCE(MAX_SIZEOF_DS < ds_cfg.sizeof_ds); + WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); } void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) @@ -787,10 +959,10 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) break; case 0xD: case 0xE: /* Pentium M */ - ds_configure(&ds_cfg_var); + ds_configure(&ds_cfg_pentium_m); break; default: /* Core2, Atom, ... */ - ds_configure(&ds_cfg_64); + ds_configure(&ds_cfg_core2); break; } break; @@ -799,7 +971,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) case 0x0: case 0x1: case 0x2: /* Netburst */ - ds_configure(&ds_cfg_var); + ds_configure(&ds_cfg_netburst); break; default: /* sorry, don't know about them */ @@ -812,14 +984,41 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) } } -void ds_free(struct ds_context *context) +/* + * Change the DS configuration from tracing prev to tracing next. + */ +void ds_switch_to(struct task_struct *prev, struct task_struct *next) { - /* This is called when the task owning the parameter context - * is dying. There should not be any user of that context left - * to disturb us, anymore. */ - unsigned long leftovers = context->count; - while (leftovers--) { - put_tracer(context->task); - ds_put_context(context); + struct ds_context *prev_ctx = prev->thread.ds_ctx; + struct ds_context *next_ctx = next->thread.ds_ctx; + + if (prev_ctx) { + update_debugctlmsr(0); + + if (prev_ctx->bts_master && + (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { + struct bts_struct ts = { + .qualifier = bts_task_departs, + .variant.timestamp.jiffies = jiffies_64, + .variant.timestamp.pid = prev->pid + }; + bts_write(prev_ctx->bts_master, &ts); + } + } + + if (next_ctx) { + if (next_ctx->bts_master && + (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { + struct bts_struct ts = { + .qualifier = bts_task_arrives, + .variant.timestamp.jiffies = jiffies_64, + .variant.timestamp.pid = next->pid + }; + bts_write(next_ctx->bts_master, &ts); + } + + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); } + + update_debugctlmsr(next->thread.debugctlmsr); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 24c2276aa453..605eff9a8ac0 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -252,11 +252,14 @@ void exit_thread(void) put_cpu(); } #ifdef CONFIG_X86_DS - /* Free any DS contexts that have not been properly released. */ - if (unlikely(current->thread.ds_ctx)) { - /* we clear debugctl to make sure DS is not used. */ - update_debugctlmsr(0); - ds_free(current->thread.ds_ctx); + /* Free any BTS tracers that have not been properly released. */ + if (unlikely(current->bts)) { + ds_release_bts(current->bts); + current->bts = NULL; + + kfree(current->bts_buffer); + current->bts_buffer = NULL; + current->bts_size = 0; } #endif /* CONFIG_X86_DS */ } @@ -420,48 +423,19 @@ int set_tsc_mode(unsigned int val) return 0; } -#ifdef CONFIG_X86_DS -static int update_debugctl(struct thread_struct *prev, - struct thread_struct *next, unsigned long debugctl) -{ - unsigned long ds_prev = 0; - unsigned long ds_next = 0; - - if (prev->ds_ctx) - ds_prev = (unsigned long)prev->ds_ctx->ds; - if (next->ds_ctx) - ds_next = (unsigned long)next->ds_ctx->ds; - - if (ds_next != ds_prev) { - /* we clear debugctl to make sure DS - * is not in use when we change it */ - debugctl = 0; - update_debugctlmsr(0); - wrmsr(MSR_IA32_DS_AREA, ds_next, 0); - } - return debugctl; -} -#else -static int update_debugctl(struct thread_struct *prev, - struct thread_struct *next, unsigned long debugctl) -{ - return debugctl; -} -#endif /* CONFIG_X86_DS */ - static noinline void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss) { struct thread_struct *prev, *next; - unsigned long debugctl; prev = &prev_p->thread; next = &next_p->thread; - debugctl = update_debugctl(prev, next, prev->debugctlmsr); - - if (next->debugctlmsr != debugctl) + if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || + test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) + ds_switch_to(prev_p, next_p); + else if (next->debugctlmsr != prev->debugctlmsr) update_debugctlmsr(next->debugctlmsr); if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { @@ -483,15 +457,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, hard_enable_TSC(); } -#ifdef CONFIG_X86_PTRACE_BTS - if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) - ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); - - if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) - ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); -#endif /* CONFIG_X86_PTRACE_BTS */ - - if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* * Disable the bitmap via an invalid offset. We still cache diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index fbb321d53d34..1cfd2a4bf853 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -237,11 +237,14 @@ void exit_thread(void) put_cpu(); } #ifdef CONFIG_X86_DS - /* Free any DS contexts that have not been properly released. */ - if (unlikely(t->ds_ctx)) { - /* we clear debugctl to make sure DS is not used. */ - update_debugctlmsr(0); - ds_free(t->ds_ctx); + /* Free any BTS tracers that have not been properly released. */ + if (unlikely(current->bts)) { + ds_release_bts(current->bts); + current->bts = NULL; + + kfree(current->bts_buffer); + current->bts_buffer = NULL; + current->bts_size = 0; } #endif /* CONFIG_X86_DS */ } @@ -471,35 +474,14 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, struct tss_struct *tss) { struct thread_struct *prev, *next; - unsigned long debugctl; prev = &prev_p->thread, next = &next_p->thread; - debugctl = prev->debugctlmsr; - -#ifdef CONFIG_X86_DS - { - unsigned long ds_prev = 0, ds_next = 0; - - if (prev->ds_ctx) - ds_prev = (unsigned long)prev->ds_ctx->ds; - if (next->ds_ctx) - ds_next = (unsigned long)next->ds_ctx->ds; - - if (ds_next != ds_prev) { - /* - * We clear debugctl to make sure DS - * is not in use when we change it: - */ - debugctl = 0; - update_debugctlmsr(0); - wrmsrl(MSR_IA32_DS_AREA, ds_next); - } - } -#endif /* CONFIG_X86_DS */ - - if (next->debugctlmsr != debugctl) + if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || + test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) + ds_switch_to(prev_p, next_p); + else if (next->debugctlmsr != prev->debugctlmsr) update_debugctlmsr(next->debugctlmsr); if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { @@ -534,14 +516,6 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, */ memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } - -#ifdef CONFIG_X86_PTRACE_BTS - if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) - ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); - - if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) - ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); -#endif /* CONFIG_X86_PTRACE_BTS */ } /* diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b2998fe1166b..45e9855da2d2 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -581,153 +581,73 @@ static int ioperm_get(struct task_struct *target, } #ifdef CONFIG_X86_PTRACE_BTS -/* - * The configuration for a particular BTS hardware implementation. - */ -struct bts_configuration { - /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */ - unsigned char sizeof_bts; - /* the size of a field in the BTS record in bytes */ - unsigned char sizeof_field; - /* a bitmask to enable/disable BTS in DEBUGCTL MSR */ - unsigned long debugctl_mask; -}; -static struct bts_configuration bts_cfg; - -#define BTS_MAX_RECORD_SIZE (8 * 3) - - -/* - * Branch Trace Store (BTS) uses the following format. Different - * architectures vary in the size of those fields. - * - source linear address - * - destination linear address - * - flags - * - * Later architectures use 64bit pointers throughout, whereas earlier - * architectures use 32bit pointers in 32bit mode. - * - * We compute the base address for the first 8 fields based on: - * - the field size stored in the DS configuration - * - the relative field position - * - * In order to store additional information in the BTS buffer, we use - * a special source address to indicate that the record requires - * special interpretation. - * - * Netburst indicated via a bit in the flags field whether the branch - * was predicted; this is ignored. - */ - -enum bts_field { - bts_from = 0, - bts_to, - bts_flags, - - bts_escape = (unsigned long)-1, - bts_qual = bts_to, - bts_jiffies = bts_flags -}; - -static inline unsigned long bts_get(const char *base, enum bts_field field) -{ - base += (bts_cfg.sizeof_field * field); - return *(unsigned long *)base; -} - -static inline void bts_set(char *base, enum bts_field field, unsigned long val) -{ - base += (bts_cfg.sizeof_field * field);; - (*(unsigned long *)base) = val; -} - -/* - * Translate a BTS record from the raw format into the bts_struct format - * - * out (out): bts_struct interpretation - * raw: raw BTS record - */ -static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw) -{ - memset(out, 0, sizeof(*out)); - if (bts_get(raw, bts_from) == bts_escape) { - out->qualifier = bts_get(raw, bts_qual); - out->variant.jiffies = bts_get(raw, bts_jiffies); - } else { - out->qualifier = BTS_BRANCH; - out->variant.lbr.from_ip = bts_get(raw, bts_from); - out->variant.lbr.to_ip = bts_get(raw, bts_to); - } -} - static int ptrace_bts_read_record(struct task_struct *child, size_t index, struct bts_struct __user *out) { - struct bts_struct ret; - const void *bts_record; - size_t bts_index, bts_end; + const struct bts_trace *trace; + struct bts_struct bts; + const unsigned char *at; int error; - error = ds_get_bts_end(child->bts, &bts_end); - if (error < 0) - return error; - - if (bts_end <= index) - return -EINVAL; + trace = ds_read_bts(child->bts); + if (!trace) + return -EPERM; - error = ds_get_bts_index(child->bts, &bts_index); - if (error < 0) - return error; + at = trace->ds.top - ((index + 1) * trace->ds.size); + if ((void *)at < trace->ds.begin) + at += (trace->ds.n * trace->ds.size); - /* translate the ptrace bts index into the ds bts index */ - bts_index += bts_end - (index + 1); - if (bts_end <= bts_index) - bts_index -= bts_end; + if (!trace->read) + return -EOPNOTSUPP; - error = ds_access_bts(child->bts, bts_index, &bts_record); + error = trace->read(child->bts, at, &bts); if (error < 0) return error; - ptrace_bts_translate_record(&ret, bts_record); - - if (copy_to_user(out, &ret, sizeof(ret))) + if (copy_to_user(out, &bts, sizeof(bts))) return -EFAULT; - return sizeof(ret); + return sizeof(bts); } static int ptrace_bts_drain(struct task_struct *child, long size, struct bts_struct __user *out) { - struct bts_struct ret; - const unsigned char *raw; - size_t end, i; - int error; + const struct bts_trace *trace; + const unsigned char *at; + int error, drained = 0; - error = ds_get_bts_index(child->bts, &end); - if (error < 0) - return error; + trace = ds_read_bts(child->bts); + if (!trace) + return -EPERM; - if (size < (end * sizeof(struct bts_struct))) + if (!trace->read) + return -EOPNOTSUPP; + + if (size < (trace->ds.top - trace->ds.begin)) return -EIO; - error = ds_access_bts(child->bts, 0, (const void **)&raw); - if (error < 0) - return error; + for (at = trace->ds.begin; (void *)at < trace->ds.top; + out++, drained++, at += trace->ds.size) { + struct bts_struct bts; + int error; - for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { - ptrace_bts_translate_record(&ret, raw); + error = trace->read(child->bts, at, &bts); + if (error < 0) + return error; - if (copy_to_user(out, &ret, sizeof(ret))) + if (copy_to_user(out, &bts, sizeof(bts))) return -EFAULT; } - error = ds_clear_bts(child->bts); + memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); + + error = ds_reset_bts(child->bts); if (error < 0) return error; - return end; + return drained; } static int ptrace_bts_config(struct task_struct *child, @@ -735,136 +655,89 @@ static int ptrace_bts_config(struct task_struct *child, const struct ptrace_bts_config __user *ucfg) { struct ptrace_bts_config cfg; - int error = 0; - - error = -EOPNOTSUPP; - if (!bts_cfg.sizeof_bts) - goto errout; + unsigned int flags = 0; - error = -EIO; if (cfg_size < sizeof(cfg)) - goto errout; + return -EIO; - error = -EFAULT; if (copy_from_user(&cfg, ucfg, sizeof(cfg))) - goto errout; - - error = -EINVAL; - if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && - !(cfg.flags & PTRACE_BTS_O_ALLOC)) - goto errout; - - if (cfg.flags & PTRACE_BTS_O_ALLOC) { - bts_ovfl_callback_t ovfl = NULL; - unsigned int sig = 0; - - error = -EINVAL; - if (cfg.size < (10 * bts_cfg.sizeof_bts)) - goto errout; + return -EFAULT; - if (cfg.flags & PTRACE_BTS_O_SIGNAL) { - if (!cfg.signal) - goto errout; + if (child->bts) { + ds_release_bts(child->bts); + child->bts = NULL; + } - error = -EOPNOTSUPP; - goto errout; + if (cfg.flags & PTRACE_BTS_O_SIGNAL) { + if (!cfg.signal) + return -EINVAL; - sig = cfg.signal; - } + return -EOPNOTSUPP; - if (child->bts) { - (void)ds_release_bts(child->bts); - kfree(child->bts_buffer); + child->thread.bts_ovfl_signal = cfg.signal; + } - child->bts = NULL; - child->bts_buffer = NULL; - } + if ((cfg.flags & PTRACE_BTS_O_ALLOC) && + (cfg.size != child->bts_size)) { + kfree(child->bts_buffer); - error = -ENOMEM; + child->bts_size = cfg.size; child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL); - if (!child->bts_buffer) - goto errout; - - child->bts = ds_request_bts(child, child->bts_buffer, cfg.size, - ovfl, /* th = */ (size_t)-1); - if (IS_ERR(child->bts)) { - error = PTR_ERR(child->bts); - kfree(child->bts_buffer); - child->bts = NULL; - child->bts_buffer = NULL; - goto errout; + if (!child->bts_buffer) { + child->bts_size = 0; + return -ENOMEM; } - - child->thread.bts_ovfl_signal = sig; } - error = -EINVAL; - if (!child->thread.ds_ctx && cfg.flags) - goto errout; - if (cfg.flags & PTRACE_BTS_O_TRACE) - child->thread.debugctlmsr |= bts_cfg.debugctl_mask; - else - child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; + flags |= BTS_USER; if (cfg.flags & PTRACE_BTS_O_SCHED) - set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - else - clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); + flags |= BTS_TIMESTAMPS; - error = sizeof(cfg); + child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size, + /* ovfl = */ NULL, /* th = */ (size_t)-1, + flags); + if (IS_ERR(child->bts)) { + int error = PTR_ERR(child->bts); -out: - if (child->thread.debugctlmsr) - set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - else - clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); + kfree(child->bts_buffer); + child->bts = NULL; + child->bts_buffer = NULL; + child->bts_size = 0; - return error; + return error; + } -errout: - child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; - clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - goto out; + return sizeof(cfg); } static int ptrace_bts_status(struct task_struct *child, long cfg_size, struct ptrace_bts_config __user *ucfg) { + const struct bts_trace *trace; struct ptrace_bts_config cfg; - size_t end; - const void *base, *max; - int error; if (cfg_size < sizeof(cfg)) return -EIO; - error = ds_get_bts_end(child->bts, &end); - if (error < 0) - return error; - - error = ds_access_bts(child->bts, /* index = */ 0, &base); - if (error < 0) - return error; - - error = ds_access_bts(child->bts, /* index = */ end, &max); - if (error < 0) - return error; + trace = ds_read_bts(child->bts); + if (!trace) + return -EPERM; memset(&cfg, 0, sizeof(cfg)); - cfg.size = (max - base); + cfg.size = trace->ds.end - trace->ds.begin; cfg.signal = child->thread.bts_ovfl_signal; cfg.bts_size = sizeof(struct bts_struct); if (cfg.signal) cfg.flags |= PTRACE_BTS_O_SIGNAL; - if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && - child->thread.debugctlmsr & bts_cfg.debugctl_mask) + if (trace->ds.flags & BTS_USER) cfg.flags |= PTRACE_BTS_O_TRACE; - if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) + if (trace->ds.flags & BTS_TIMESTAMPS) cfg.flags |= PTRACE_BTS_O_SCHED; if (copy_to_user(ucfg, &cfg, sizeof(cfg))) @@ -873,105 +746,28 @@ static int ptrace_bts_status(struct task_struct *child, return sizeof(cfg); } -static int ptrace_bts_write_record(struct task_struct *child, - const struct bts_struct *in) +static int ptrace_bts_clear(struct task_struct *child) { - unsigned char bts_record[BTS_MAX_RECORD_SIZE]; + const struct bts_trace *trace; - if (BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts) - return -EOVERFLOW; + trace = ds_read_bts(child->bts); + if (!trace) + return -EPERM; - memset(bts_record, 0, bts_cfg.sizeof_bts); - switch (in->qualifier) { - case BTS_INVALID: - break; + memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); - case BTS_BRANCH: - bts_set(bts_record, bts_from, in->variant.lbr.from_ip); - bts_set(bts_record, bts_to, in->variant.lbr.to_ip); - break; - - case BTS_TASK_ARRIVES: - case BTS_TASK_DEPARTS: - bts_set(bts_record, bts_from, bts_escape); - bts_set(bts_record, bts_qual, in->qualifier); - bts_set(bts_record, bts_jiffies, in->variant.jiffies); - break; - - default: - return -EINVAL; - } - - return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts); + return ds_reset_bts(child->bts); } -void ptrace_bts_take_timestamp(struct task_struct *tsk, - enum bts_qualifier qualifier) +static int ptrace_bts_size(struct task_struct *child) { - struct bts_struct rec = { - .qualifier = qualifier, - .variant.jiffies = jiffies_64 - }; - - ptrace_bts_write_record(tsk, &rec); -} - -static const struct bts_configuration bts_cfg_netburst = { - .sizeof_bts = sizeof(long) * 3, - .sizeof_field = sizeof(long), - .debugctl_mask = (1<<2)|(1<<3)|(1<<5) -}; + const struct bts_trace *trace; -static const struct bts_configuration bts_cfg_pentium_m = { - .sizeof_bts = sizeof(long) * 3, - .sizeof_field = sizeof(long), - .debugctl_mask = (1<<6)|(1<<7) -}; + trace = ds_read_bts(child->bts); + if (!trace) + return -EPERM; -static const struct bts_configuration bts_cfg_core2 = { - .sizeof_bts = 8 * 3, - .sizeof_field = 8, - .debugctl_mask = (1<<6)|(1<<7)|(1<<9) -}; - -static inline void bts_configure(const struct bts_configuration *cfg) -{ - bts_cfg = *cfg; -} - -void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) -{ - switch (c->x86) { - case 0x6: - switch (c->x86_model) { - case 0 ... 0xC: - /* sorry, don't know about them */ - break; - case 0xD: - case 0xE: /* Pentium M */ - bts_configure(&bts_cfg_pentium_m); - break; - default: /* Core2, Atom, ... */ - bts_configure(&bts_cfg_core2); - break; - } - break; - case 0xF: - switch (c->x86_model) { - case 0x0: - case 0x1: - case 0x2: /* Netburst */ - bts_configure(&bts_cfg_netburst); - break; - default: - /* sorry, don't know about them */ - break; - } - break; - default: - /* sorry, don't know about them */ - break; - } + return (trace->ds.top - trace->ds.begin) / trace->ds.size; } #endif /* CONFIG_X86_PTRACE_BTS */ @@ -988,15 +784,12 @@ void ptrace_disable(struct task_struct *child) #endif #ifdef CONFIG_X86_PTRACE_BTS if (child->bts) { - (void)ds_release_bts(child->bts); + ds_release_bts(child->bts); + child->bts = NULL; + kfree(child->bts_buffer); child->bts_buffer = NULL; - - child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; - if (!child->thread.debugctlmsr) - clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - - clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); + child->bts_size = 0; } #endif /* CONFIG_X86_PTRACE_BTS */ } @@ -1129,16 +922,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) (child, data, (struct ptrace_bts_config __user *)addr); break; - case PTRACE_BTS_SIZE: { - size_t size; - - ret = ds_get_bts_index(child->bts, &size); - if (ret == 0) { - WARN_ON_ONCE(size != (int) size); - ret = (int) size; - } + case PTRACE_BTS_SIZE: + ret = ptrace_bts_size(child); break; - } case PTRACE_BTS_GET: ret = ptrace_bts_read_record @@ -1146,7 +932,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_CLEAR: - ret = ds_clear_bts(child->bts); + ret = ptrace_bts_clear(child); break; case PTRACE_BTS_DRAIN: @@ -1409,6 +1195,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, case PTRACE_GET_THREAD_AREA: case PTRACE_SET_THREAD_AREA: +#ifdef CONFIG_X86_PTRACE_BTS + case PTRACE_BTS_CONFIG: + case PTRACE_BTS_STATUS: + case PTRACE_BTS_SIZE: + case PTRACE_BTS_GET: + case PTRACE_BTS_CLEAR: + case PTRACE_BTS_DRAIN: +#endif /* CONFIG_X86_PTRACE_BTS */ return arch_ptrace(child, request, addr, data); default: diff --git a/include/linux/sched.h b/include/linux/sched.h index 4b81fc5f7731..dc5ea65dc716 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1176,6 +1176,7 @@ struct task_struct { * The buffer to hold the BTS data. */ void *bts_buffer; + size_t bts_size; #endif /* CONFIG_X86_PTRACE_BTS */ /* PID/PID hash table linkage. */ -- cgit v1.2.3 From 8001530d5af707eb9a158839c8f651eb6c1cb3c2 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 11 Dec 2008 16:10:08 +0100 Subject: tracing/fastboot: fix len of func buffer Impact: fix possible stack overrun This is a port of a patch included in the mainline (KSYM_SYMBOL_LEN fixes). The current func len is not large enough to contain the max symbol len, the right size must be KSYM_SYMBOL_LEN. Signed-off-by: Stephen Rothwell Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/trace/boot.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/boot.h b/include/trace/boot.h index 6b54537eab02..3ec58b4b7512 100644 --- a/include/trace/boot.h +++ b/include/trace/boot.h @@ -9,7 +9,7 @@ */ struct boot_trace_call { pid_t caller; - char func[KSYM_NAME_LEN]; + char func[KSYM_SYMBOL_LEN]; }; /* @@ -17,7 +17,7 @@ struct boot_trace_call { * while it returns. */ struct boot_trace_ret { - char func[KSYM_NAME_LEN]; + char func[KSYM_SYMBOL_LEN]; int result; unsigned long long duration; /* nsecs */ }; -- cgit v1.2.3 From da485e0cb16726797e99a595a399b9fc721b91bc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 11 Dec 2008 16:14:23 +0100 Subject: tracing/fastboot: include missing headers For now include/trace/boot.h doesn't need to include necessary headers for its functions and structures because the files that include it already do it. But boot.h could be needed as well for further uses on other files. So, this patch adds the necessary headers for future purposes... Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- include/trace/boot.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/trace/boot.h b/include/trace/boot.h index 3ec58b4b7512..088ea089e31d 100644 --- a/include/trace/boot.h +++ b/include/trace/boot.h @@ -1,6 +1,10 @@ #ifndef _LINUX_TRACE_BOOT_H #define _LINUX_TRACE_BOOT_H +#include +#include +#include + /* * Structure which defines the trace of an initcall * while it is called. -- cgit v1.2.3 From a0343e823184070f55364d8359f832dcb33c57c7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 9 Dec 2008 23:53:16 +0100 Subject: tracing/function-graph-tracer: add a new .irqentry.text section Impact: let the function-graph-tracer be aware of the irq entrypoints Add a new .irqentry.text section to store the irq entrypoints functions inside the same section. This way, the tracer will be able to signal an interrupts triggering on output by recognizing these entrypoints. Also, make this section recordable for dynamic tracing. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux_64.lds.S | 1 + include/asm-generic/vmlinux.lds.h | 10 ++++++++++ scripts/recordmcount.pl | 1 + 3 files changed, 12 insertions(+) (limited to 'include') diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 46e05447405b..1a614c0e6bef 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -35,6 +35,7 @@ SECTIONS SCHED_TEXT LOCK_TEXT KPROBES_TEXT + IRQENTRY_TEXT *(.fixup) *(.gnu.warning) _etext = .; /* End of text section */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index eba835a2c2cd..c61fab1dd2f8 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -288,6 +288,16 @@ *(.kprobes.text) \ VMLINUX_SYMBOL(__kprobes_text_end) = .; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#define IRQENTRY_TEXT \ + ALIGN_FUNCTION(); \ + VMLINUX_SYMBOL(__irqentry_text_start) = .; \ + *(.irqentry.text) \ + VMLINUX_SYMBOL(__irqentry_text_end) = .; +#else +#define IRQENTRY_TEXT +#endif + /* Section used for early init (in .S files) */ #define HEAD_TEXT *(.head.text) diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 0b1dc9f9bb06..fe831412bea9 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -114,6 +114,7 @@ my %text_sections = ( ".text" => 1, ".sched.text" => 1, ".spinlock.text" => 1, + ".irqentry.text" => 1, ); $objdump = "objdump" if ((length $objdump) == 0); -- cgit v1.2.3 From bcbc4f20b52c2c40c43a4d2337707dcdfe81bc3a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 9 Dec 2008 23:54:20 +0100 Subject: tracing/function-graph-tracer: annotate do_IRQ and smp_apic_timer_interrupt Impact: move most important x86 irq entry-points to a separate subsection Annotate do_IRQ and smp_apic_timer_interrupt to put them into the .irqentry.text subsection. These function will so be recognized as hardirq entrypoints for the function-graph-tracer. We could also annotate other irq entries but the others are far less important but they can be added on request. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 3 ++- arch/x86/kernel/irq_64.c | 3 ++- include/linux/ftrace.h | 11 +++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 16f94879b525..b946ac19753b 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -800,7 +801,7 @@ static void local_apic_timer_interrupt(void) * [ if a single-CPU system runs an SMP kernel then we call the local * interrupt as well. Thus we cannot inline the local irq ... ] */ -void smp_apic_timer_interrupt(struct pt_regs *regs) +void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 60eb84eb77a0..11c65e811ffe 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -47,7 +48,7 @@ static inline void stack_overflow_check(struct pt_regs *regs) * SMP cross-CPU interrupts have their own specific * handlers). */ -asmlinkage unsigned int do_IRQ(struct pt_regs *regs) +asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); struct irq_desc *desc; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 11cac81eed08..44020f31bd81 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -377,6 +377,16 @@ struct ftrace_graph_ret { */ #define __notrace_funcgraph notrace +/* + * We want to which function is an entrypoint of a hardirq. + * That will help us to put a signal on output. + */ +#define __irq_entry __attribute__((__section__(".irqentry.text"))) + +/* Limits of hardirq entrypoints */ +extern char __irqentry_text_start[]; +extern char __irqentry_text_end[]; + #define FTRACE_RETFUNC_DEPTH 50 #define FTRACE_RETSTACK_ALLOC_SIZE 32 /* Type of the callback handlers for tracing function graph*/ @@ -414,6 +424,7 @@ static inline void unpause_graph_tracing(void) #else #define __notrace_funcgraph +#define __irq_entry static inline void ftrace_graph_init_task(struct task_struct *t) { } static inline void ftrace_graph_exit_task(struct task_struct *t) { } -- cgit v1.2.3 From cbc34ed1ac36690f75fd272e19e7b4fc29aae5a2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Dec 2008 08:08:22 +0100 Subject: sched: fix tracepoints in scheduler The trace point only caught one of many places where a task changes cpu, put it in the right place to we get all of them. Change the signature while we're at it. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/trace/sched.h | 4 ++-- kernel/sched.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/sched.h b/include/trace/sched.h index 9b2854abf7e2..f4549d506b16 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -30,8 +30,8 @@ DECLARE_TRACE(sched_switch, TPARGS(rq, prev, next)); DECLARE_TRACE(sched_migrate_task, - TPPROTO(struct rq *rq, struct task_struct *p, int dest_cpu), - TPARGS(rq, p, dest_cpu)); + TPPROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + TPARGS(p, orig_cpu, dest_cpu)); DECLARE_TRACE(sched_process_free, TPPROTO(struct task_struct *p), diff --git a/kernel/sched.c b/kernel/sched.c index 7729c4bbc8ba..d377097572f9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1851,6 +1851,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) clock_offset = old_rq->clock - new_rq->clock; + trace_sched_migrate_task(p, task_cpu(p), new_cpu); + #ifdef CONFIG_SCHEDSTATS if (p->se.wait_start) p->se.wait_start -= clock_offset; @@ -2868,7 +2870,6 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) || unlikely(!cpu_active(dest_cpu))) goto out; - trace_sched_migrate_task(rq, p, dest_cpu); /* force the process onto the specified CPU */ if (migrate_task(p, dest_cpu, &req)) { /* Need to wait for migration thread (might exit: take ref). */ -- cgit v1.2.3 From 0ebb26e7a4e2c5337502e98b2221e037fda911b9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 12 Dec 2008 11:26:39 +0100 Subject: sparse irqs: handle !GENIRQ platforms Impact: build fix fix: In file included from /home/mingo/tip/arch/m68k/amiga/amiints.c:39: /home/mingo/tip/include/linux/interrupt.h:21: error: expected identifier or '(' /home/mingo/tip/arch/m68k/amiga/amiints.c: In function 'amiga_init_IRQ': Signed-off-by: Ingo Molnar --- include/linux/interrupt.h | 4 ++-- include/linux/irqnr.h | 11 ++++++++++- include/linux/random.h | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 79e915e7e8a5..777f89e00b4a 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -14,12 +14,12 @@ #include #include #include +#include + #include #include #include -extern int nr_irqs; - /* * These correspond to the IORESOURCE_IRQ_* defines in * linux/ioport.h to select the interrupt line behaviour. When diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 13754f813589..95d2b74641f5 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -1,6 +1,11 @@ #ifndef _LINUX_IRQNR_H #define _LINUX_IRQNR_H +/* + * Generic irq_desc iterators: + */ +#ifdef __KERNEL__ + #ifndef CONFIG_GENERIC_HARDIRQS #include # define nr_irqs NR_IRQS @@ -11,10 +16,12 @@ # define for_each_irq_desc_reverse(irq, desc) \ for (irq = nr_irqs - 1; irq >= 0; irq--) #else + +extern int nr_irqs; + #ifndef CONFIG_SPARSE_IRQ struct irq_desc; -extern int nr_irqs; # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_desc; irq < nr_irqs; irq++, desc++) # define for_each_irq_desc_reverse(irq, desc) \ @@ -26,4 +33,6 @@ extern int nr_irqs; #define for_each_irq_nr(irq) \ for (irq = 0; irq < nr_irqs; irq++) +#endif /* __KERNEL__ */ + #endif diff --git a/include/linux/random.h b/include/linux/random.h index ad9daa2374d5..adbf3bd3c6b3 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -8,6 +8,7 @@ #define _LINUX_RANDOM_H #include +#include /* ioctl()'s for the random number generator */ @@ -49,7 +50,6 @@ struct timer_rand_state; extern struct timer_rand_state *irq_timer_state[]; -extern int nr_irqs; static inline struct timer_rand_state *get_timer_rand_state(unsigned int irq) { if (irq >= nr_irqs) -- cgit v1.2.3 From 30cb367ea2be76bf71dbd275f38d0fd3b6f4142b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 12 Dec 2008 12:19:57 +0100 Subject: sparse irqs: add irqnr.h to the user headers list Impact: fix build error /home/mingo/tip/usr/include/linux/random.h:11: included file 'linux/irqnr.h' is not exported Signed-off-by: Ingo Molnar --- include/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index e531783e5d78..95ac82340c3b 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -313,6 +313,7 @@ unifdef-y += ptrace.h unifdef-y += qnx4_fs.h unifdef-y += quota.h unifdef-y += random.h +unifdef-y += irqnr.h unifdef-y += reboot.h unifdef-y += reiserfs_fs.h unifdef-y += reiserfs_xattr.h -- cgit v1.2.3 From ee79d1bdb6a10499e53f80b1e8d14110215178ba Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 9 Dec 2008 18:49:50 +0100 Subject: sched: let arch_update_cpu_topology indicate if topology changed Change arch_update_cpu_topology so it returns 1 if the cpu topology changed and 0 if it didn't change. This will be useful for the next patch which adds a call to this function in partition_sched_domains. Signed-off-by: Heiko Carstens Signed-off-by: Ingo Molnar --- arch/s390/kernel/topology.c | 5 +++-- include/linux/topology.h | 2 +- kernel/sched.c | 8 +++++++- 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index a947899dcba1..bf96f1b5c6ec 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -212,7 +212,7 @@ static void update_cpu_core_map(void) cpu_core_map[cpu] = cpu_coregroup_map(cpu); } -void arch_update_cpu_topology(void) +int arch_update_cpu_topology(void) { struct tl_info *info = tl_info; struct sys_device *sysdev; @@ -221,7 +221,7 @@ void arch_update_cpu_topology(void) if (!machine_has_topology) { update_cpu_core_map(); topology_update_polarization_simple(); - return; + return 0; } stsi(info, 15, 1, 2); tl_to_cores(info); @@ -230,6 +230,7 @@ void arch_update_cpu_topology(void) sysdev = get_cpu_sysdev(cpu); kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); } + return 1; } static void topology_work_fn(struct work_struct *work) diff --git a/include/linux/topology.h b/include/linux/topology.h index 117f1b7405cf..0c5b5ac36d8e 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -49,7 +49,7 @@ for_each_online_node(node) \ if (nr_cpus_node(node)) -void arch_update_cpu_topology(void); +int arch_update_cpu_topology(void); /* Conform to ACPI 2.0 SLIT distance definitions */ #define LOCAL_DISTANCE 10 diff --git a/kernel/sched.c b/kernel/sched.c index ef212da928e8..fcfbbd9dbd60 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7675,8 +7675,14 @@ static struct sched_domain_attr *dattr_cur; */ static cpumask_t fallback_doms; -void __attribute__((weak)) arch_update_cpu_topology(void) +/* + * arch_update_cpu_topology lets virtualized architectures update the + * cpu core maps. It is supposed to return 1 if the topology changed + * or 0 if it stayed the same. + */ +int __attribute__((weak)) arch_update_cpu_topology(void) { + return 0; } /* -- cgit v1.2.3 From 27af4245b6ce99e08c6a7c38825383bf51119cc9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 1 Dec 2008 14:18:13 -0800 Subject: posix-timers: use "struct pid*" instead of "struct task_struct*" Impact: restructure, clean up code k_itimer holds the ref to the ->it_process until sys_timer_delete(). This allows to pin up to RLIMIT_SIGPENDING dead task_struct's. Change the code to use "struct pid *" instead. The patch doesn't kill ->it_process, it places ->it_pid into the union. ->it_process is still used by do_cpu_nanosleep() as before. It would be trivial to change the nanosleep code as well, but since it uses it_process in a special way I think it is better to keep this field for grep. The patch bloats the kernel by 104 bytes and it also adds the new pointer, ->it_signal, to k_itimer. It is used by lock_timer() to verify that the found timer was not created by another process. It is not clear why do we use the global database (and thus the global idr_lock) for posix timers. We still need the signal_struct->posix_timers which contains all useable timers, perhaps it is better to use some form of per-process array instead. Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/posix-timers.h | 6 +++++- kernel/posix-timers.c | 43 +++++++++++++++++++++++-------------------- 2 files changed, 28 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index a7c721355549..4f71bf4e628c 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -45,7 +45,11 @@ struct k_itimer { int it_requeue_pending; /* waiting to requeue this timer */ #define REQUEUE_PENDING 1 int it_sigev_notify; /* notify word of sigevent struct */ - struct task_struct *it_process; /* process to send signal to */ + struct signal_struct *it_signal; + union { + struct pid *it_pid; /* pid of process to send signal to */ + struct task_struct *it_process; /* for clock_nanosleep */ + }; struct sigqueue *sigq; /* signal queue entry. */ union { struct { diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 5e79c662294b..42a39afd694a 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -116,7 +116,7 @@ static DEFINE_SPINLOCK(idr_lock); * must supply functions here, even if the function just returns * ENOSYS. The standard POSIX timer management code assumes the * following: 1.) The k_itimer struct (sched.h) is used for the - * timer. 2.) The list, it_lock, it_clock, it_id and it_process + * timer. 2.) The list, it_lock, it_clock, it_id and it_pid * fields are not modified by timer code. * * At this time all functions EXCEPT clock_nanosleep can be @@ -313,7 +313,8 @@ void do_schedule_next_timer(struct siginfo *info) int posix_timer_event(struct k_itimer *timr, int si_private) { - int shared, ret; + struct task_struct *task; + int shared, ret = -1; /* * FIXME: if ->sigq is queued we can race with * dequeue_signal()->do_schedule_next_timer(). @@ -327,8 +328,13 @@ int posix_timer_event(struct k_itimer *timr, int si_private) */ timr->sigq->info.si_sys_private = si_private; - shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID); - ret = send_sigqueue(timr->sigq, timr->it_process, shared); + rcu_read_lock(); + task = pid_task(timr->it_pid, PIDTYPE_PID); + if (task) { + shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID); + ret = send_sigqueue(timr->sigq, task, shared); + } + rcu_read_unlock(); /* If we failed to send the signal the timer stops. */ return ret > 0; } @@ -405,7 +411,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) return ret; } -static struct task_struct * good_sigevent(sigevent_t * event) +static struct pid *good_sigevent(sigevent_t * event) { struct task_struct *rtn = current->group_leader; @@ -419,7 +425,7 @@ static struct task_struct * good_sigevent(sigevent_t * event) ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX))) return NULL; - return rtn; + return task_pid(rtn); } void register_posix_clock(const clockid_t clock_id, struct k_clock *new_clock) @@ -471,7 +477,7 @@ sys_timer_create(const clockid_t which_clock, { struct k_itimer *new_timer; int error, new_timer_id; - struct task_struct *process; + struct pid *it_pid; sigevent_t event; int it_id_set = IT_ID_NOT_SET; @@ -525,11 +531,9 @@ sys_timer_create(const clockid_t which_clock, goto out; } rcu_read_lock(); - process = good_sigevent(&event); - if (process) - get_task_struct(process); + it_pid = get_pid(good_sigevent(&event)); rcu_read_unlock(); - if (!process) { + if (!it_pid) { error = -EINVAL; goto out; } @@ -537,8 +541,7 @@ sys_timer_create(const clockid_t which_clock, event.sigev_notify = SIGEV_SIGNAL; event.sigev_signo = SIGALRM; event.sigev_value.sival_int = new_timer->it_id; - process = current->group_leader; - get_task_struct(process); + it_pid = get_pid(task_tgid(current)); } new_timer->it_sigev_notify = event.sigev_notify; @@ -548,7 +551,8 @@ sys_timer_create(const clockid_t which_clock, new_timer->sigq->info.si_code = SI_TIMER; spin_lock_irq(¤t->sighand->siglock); - new_timer->it_process = process; + new_timer->it_pid = it_pid; + new_timer->it_signal = current->signal; list_add(&new_timer->list, ¤t->signal->posix_timers); spin_unlock_irq(¤t->sighand->siglock); @@ -583,8 +587,7 @@ static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags) timr = idr_find(&posix_timers_id, (int)timer_id); if (timr) { spin_lock(&timr->it_lock); - if (timr->it_process && - same_thread_group(timr->it_process, current)) { + if (timr->it_pid && timr->it_signal == current->signal) { spin_unlock(&idr_lock); return timr; } @@ -831,8 +834,8 @@ retry_delete: * This keeps any tasks waiting on the spin lock from thinking * they got something (see the lock code above). */ - put_task_struct(timer->it_process); - timer->it_process = NULL; + put_pid(timer->it_pid); + timer->it_pid = NULL; unlock_timer(timer, flags); release_posix_timer(timer, IT_ID_SET); @@ -858,8 +861,8 @@ retry_delete: * This keeps any tasks waiting on the spin lock from thinking * they got something (see the lock code above). */ - put_task_struct(timer->it_process); - timer->it_process = NULL; + put_pid(timer->it_pid); + timer->it_pid = NULL; unlock_timer(timer, flags); release_posix_timer(timer, IT_ID_SET); -- cgit v1.2.3 From c29541b24fb2c6301021637229ae5347c877330a Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 1 Dec 2008 14:18:11 -0800 Subject: linux/timex.h: cleanup for userspace Impact: fix user-space exported use Move all the kernel-specific defines and includes into the __KERNEL__ section so that they don't get leaked into userspace. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner --- include/linux/timex.h | 73 ++++++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/timex.h b/include/linux/timex.h index 9007313b5b71..998a55d80acf 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -53,46 +53,10 @@ #ifndef _LINUX_TIMEX_H #define _LINUX_TIMEX_H -#include #include -#include - #define NTP_API 4 /* NTP API version */ -/* - * SHIFT_KG and SHIFT_KF establish the damping of the PLL and are chosen - * for a slightly underdamped convergence characteristic. SHIFT_KH - * establishes the damping of the FLL and is chosen by wisdom and black - * art. - * - * MAXTC establishes the maximum time constant of the PLL. With the - * SHIFT_KG and SHIFT_KF values given and a time constant range from - * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours, - * respectively. - */ -#define SHIFT_PLL 4 /* PLL frequency factor (shift) */ -#define SHIFT_FLL 2 /* FLL frequency factor (shift) */ -#define MAXTC 10 /* maximum time constant (shift) */ - -/* - * SHIFT_USEC defines the scaling (shift) of the time_freq and - * time_tolerance variables, which represent the current frequency - * offset and maximum frequency tolerance. - */ -#define SHIFT_USEC 16 /* frequency offset scale (shift) */ -#define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC)) -#define PPM_SCALE_INV_SHIFT 19 -#define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \ - PPM_SCALE + 1) - -#define MAXPHASE 500000000l /* max phase error (ns) */ -#define MAXFREQ 500000 /* max frequency error (ns/s) */ -#define MAXFREQ_SCALED ((s64)MAXFREQ << NTP_SCALE_SHIFT) -#define MINSEC 256 /* min interval between updates (s) */ -#define MAXSEC 2048 /* max interval between updates (s) */ -#define NTP_PHASE_LIMIT ((MAXPHASE / NSEC_PER_USEC) << 5) /* beyond max. dispersion */ - /* * syscall interface - used (mainly by NTP daemon) * to discipline kernel clock oscillator @@ -199,8 +163,45 @@ struct timex { #define TIME_BAD TIME_ERROR /* bw compat */ #ifdef __KERNEL__ +#include +#include +#include + #include +/* + * SHIFT_KG and SHIFT_KF establish the damping of the PLL and are chosen + * for a slightly underdamped convergence characteristic. SHIFT_KH + * establishes the damping of the FLL and is chosen by wisdom and black + * art. + * + * MAXTC establishes the maximum time constant of the PLL. With the + * SHIFT_KG and SHIFT_KF values given and a time constant range from + * zero to MAXTC, the PLL will converge in 15 minutes to 16 hours, + * respectively. + */ +#define SHIFT_PLL 4 /* PLL frequency factor (shift) */ +#define SHIFT_FLL 2 /* FLL frequency factor (shift) */ +#define MAXTC 10 /* maximum time constant (shift) */ + +/* + * SHIFT_USEC defines the scaling (shift) of the time_freq and + * time_tolerance variables, which represent the current frequency + * offset and maximum frequency tolerance. + */ +#define SHIFT_USEC 16 /* frequency offset scale (shift) */ +#define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC)) +#define PPM_SCALE_INV_SHIFT 19 +#define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \ + PPM_SCALE + 1) + +#define MAXPHASE 500000000l /* max phase error (ns) */ +#define MAXFREQ 500000 /* max frequency error (ns/s) */ +#define MAXFREQ_SCALED ((s64)MAXFREQ << NTP_SCALE_SHIFT) +#define MINSEC 256 /* min interval between updates (s) */ +#define MAXSEC 2048 /* max interval between updates (s) */ +#define NTP_PHASE_LIMIT ((MAXPHASE / NSEC_PER_USEC) << 5) /* beyond max. dispersion */ + /* * kernel variables * Note: maximum error = NTP synch distance = dispersion + delay / 2; -- cgit v1.2.3 From bb608e9db7d29616fb6e0d856c23434610d4a1bd Mon Sep 17 00:00:00 2001 From: Senthil Balasubramanian Date: Thu, 4 Dec 2008 20:38:13 +0530 Subject: wireless: Incorrect LEAP authentication algorithm identifier. This patch fixes a regression introduced by "wireless: avoid some net/ieee80211.h vs. linux/ieee80211.h conflicts" LEAP authentication algorithm identifier should be 128. Signed-off-by: Senthil Balasubramanian Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index a6ec928186ad..c4e6ca1a6306 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -836,7 +836,7 @@ struct ieee80211_ht_info { /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 -#define WLAN_AUTH_LEAP 2 +#define WLAN_AUTH_LEAP 128 #define WLAN_AUTH_CHALLENGE_LEN 128 -- cgit v1.2.3 From 7ba1c04ed727a70df2dc63464232c0ec906ad67d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 8 Dec 2008 11:18:32 +0100 Subject: mac80211: improve sta_notify documentation Mention more possible STA entries and document the atomic requirement. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e84c922a1b16..346f373fb676 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1262,8 +1262,8 @@ enum ieee80211_ampdu_mlme_action { * the device does fragmentation by itself; if this method is assigned then * the stack will not do fragmentation. * - * @sta_notify: Notifies low level driver about addition or removal - * of associated station or AP. + * @sta_notify: Notifies low level driver about addition or removal of an + * associated station, AP, IBSS/WDS/mesh peer etc. Must be atomic. * * @sta_ps_notify: Notifies low level driver about the power state transition * of a associated station. Must be atomic. -- cgit v1.2.3 From 0f202aa2e1e1db1d20da9bcc3f5ad43c5a22d2d5 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 8 Dec 2008 14:51:41 -0500 Subject: ieee80211_security: correct warning about width of auth_mode Also remove auth_algo which is unused. Signed-off-by: John W. Linville --- include/net/ieee80211.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index 7ab3ed2bbccb..adb7cf31f781 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -385,9 +385,8 @@ struct ieee80211_device; #define SCM_TEMPORAL_KEY_LENGTH 16 struct ieee80211_security { - u16 active_key:2, - enabled:1, - auth_mode:2, auth_algo:4, unicast_uses_group:1, encrypt:1; + u16 active_key:2, enabled:1, unicast_uses_group:1, encrypt:1; + u8 auth_mode; u8 encode_alg[WEP_KEYS]; u8 key_sizes[WEP_KEYS]; u8 keys[WEP_KEYS][SCM_KEY_LEN]; -- cgit v1.2.3 From f546638c3f809fdacddc03fe765669c3042e0d9d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 9 Dec 2008 12:30:03 +0100 Subject: mac80211: remove fragmentation offload functionality There's no driver that actually does fragmentation on the device, and the callback is buggy (when it returns an error, mac80211's fragmentation status is changed so reading the frag threshold from userspace reads the new value despite the error). Let's just remove it, if we really find some hardware supporting it we can add it back later. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 5 ----- net/mac80211/tx.c | 1 - net/mac80211/wext.c | 8 -------- 3 files changed, 14 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 346f373fb676..5ecc686c1f1a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1258,10 +1258,6 @@ enum ieee80211_ampdu_mlme_action { * * @set_rts_threshold: Configuration of RTS threshold (if device needs it) * - * @set_frag_threshold: Configuration of fragmentation threshold. Assign this if - * the device does fragmentation by itself; if this method is assigned then - * the stack will not do fragmentation. - * * @sta_notify: Notifies low level driver about addition or removal of an * associated station, AP, IBSS/WDS/mesh peer etc. Must be atomic. * @@ -1331,7 +1327,6 @@ struct ieee80211_ops { void (*get_tkip_seq)(struct ieee80211_hw *hw, u8 hw_key_idx, u32 *iv32, u16 *iv16); int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value); - int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value); void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); void (*sta_notify_ps)(struct ieee80211_hw *hw, diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d7761e95e4cf..b098c58d216f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1001,7 +1001,6 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, if (tx->flags & IEEE80211_TX_FRAGMENTED) { if ((tx->flags & IEEE80211_TX_UNICAST) && skb->len + FCS_LEN > local->fragmentation_threshold && - !local->ops->set_frag_threshold && !(info->flags & IEEE80211_TX_CTL_AMPDU)) tx->flags |= IEEE80211_TX_FRAGMENTED; else diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 4e1fdcfacb0c..fbeb927c116b 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -639,14 +639,6 @@ static int ieee80211_ioctl_siwfrag(struct net_device *dev, local->fragmentation_threshold = frag->value & ~0x1; } - /* If the wlan card performs fragmentation in hardware/firmware, - * configure it here */ - - if (local->ops->set_frag_threshold) - return local->ops->set_frag_threshold( - local_to_hw(local), - local->fragmentation_threshold); - return 0; } -- cgit v1.2.3 From 89fad578a61810b7fdf8edd294890f3c0cde4390 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Tue, 9 Dec 2008 16:28:06 +0100 Subject: mac80211: integrate sta_notify_ps cmds into sta_notify This patch replaces the newly introduced sta_notify_ps function, which can be used to notify the driver about every power state transition for all associated stations, by integrating its functionality back into the original sta_notify callback. Signed-off-by: Christian Lamparter Acked-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/mac80211_hwsim.c | 4 ++++ drivers/net/wireless/p54/p54common.c | 18 ++++-------------- include/net/mac80211.h | 27 +++++++-------------------- net/mac80211/rx.c | 12 ++++++------ 4 files changed, 21 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 530648b39935..fd5a537ac51d 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -522,6 +522,10 @@ static void mac80211_hwsim_sta_notify(struct ieee80211_hw *hw, case STA_NOTIFY_REMOVE: hwsim_clear_sta_magic(sta); break; + case STA_NOTIFY_SLEEP: + case STA_NOTIFY_AWAKE: + /* TODO: make good use of these flags */ + break; } } diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c index 89968a5bff84..409ae930d766 100644 --- a/drivers/net/wireless/p54/p54common.c +++ b/drivers/net/wireless/p54/p54common.c @@ -1051,19 +1051,6 @@ static int p54_sta_unlock(struct ieee80211_hw *dev, u8 *addr) return 0; } -static void p54_sta_notify_ps(struct ieee80211_hw *dev, - enum sta_notify_ps_cmd notify_cmd, - struct ieee80211_sta *sta) -{ - switch (notify_cmd) { - case STA_NOTIFY_AWAKE: - p54_sta_unlock(dev, sta->addr); - break; - default: - break; - } -} - static void p54_sta_notify(struct ieee80211_hw *dev, struct ieee80211_vif *vif, enum sta_notify_cmd notify_cmd, struct ieee80211_sta *sta) @@ -1076,6 +1063,10 @@ static void p54_sta_notify(struct ieee80211_hw *dev, struct ieee80211_vif *vif, * need to buffer frames for this station anymore. */ + p54_sta_unlock(dev, sta->addr); + break; + case STA_NOTIFY_AWAKE: + /* update the firmware's filter table */ p54_sta_unlock(dev, sta->addr); break; default: @@ -2027,7 +2018,6 @@ static const struct ieee80211_ops p54_ops = { .add_interface = p54_add_interface, .remove_interface = p54_remove_interface, .set_tim = p54_set_tim, - .sta_notify_ps = p54_sta_notify_ps, .sta_notify = p54_sta_notify, .set_key = p54_set_key, .config = p54_config, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5ecc686c1f1a..046ce692a906 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -773,25 +773,16 @@ struct ieee80211_sta { * enum sta_notify_cmd - sta notify command * * Used with the sta_notify() callback in &struct ieee80211_ops, this - * indicates addition and removal of a station to station table. + * indicates addition and removal of a station to station table, + * or if a associated station made a power state transition. * * @STA_NOTIFY_ADD: a station was added to the station table * @STA_NOTIFY_REMOVE: a station being removed from the station table - */ -enum sta_notify_cmd { - STA_NOTIFY_ADD, STA_NOTIFY_REMOVE -}; - -/** - * enum sta_notify_ps_cmd - sta power save notify command - * - * Used with the sta_notify_ps() callback in &struct ieee80211_ops to - * notify the driver if a station made a power state transition. - * * @STA_NOTIFY_SLEEP: a station is now sleeping * @STA_NOTIFY_AWAKE: a sleeping station woke up */ -enum sta_notify_ps_cmd { +enum sta_notify_cmd { + STA_NOTIFY_ADD, STA_NOTIFY_REMOVE, STA_NOTIFY_SLEEP, STA_NOTIFY_AWAKE, }; @@ -1258,11 +1249,9 @@ enum ieee80211_ampdu_mlme_action { * * @set_rts_threshold: Configuration of RTS threshold (if device needs it) * - * @sta_notify: Notifies low level driver about addition or removal of an - * associated station, AP, IBSS/WDS/mesh peer etc. Must be atomic. - * - * @sta_ps_notify: Notifies low level driver about the power state transition - * of a associated station. Must be atomic. + * @sta_notify: Notifies low level driver about addition, removal or power + * state transition of an associated station, AP, IBSS/WDS/mesh peer etc. + * Must be atomic. * * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max), * bursting) for a hardware TX queue. @@ -1329,8 +1318,6 @@ struct ieee80211_ops { int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value); void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); - void (*sta_notify_ps)(struct ieee80211_hw *hw, - enum sta_notify_ps_cmd, struct ieee80211_sta *sta); int (*conf_tx)(struct ieee80211_hw *hw, u16 queue, const struct ieee80211_tx_queue_params *params); int (*get_tx_stats)(struct ieee80211_hw *hw, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 14be095b8528..23443de7ee4e 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -658,9 +658,9 @@ static void ap_sta_ps_start(struct sta_info *sta) atomic_inc(&sdata->bss->num_sta_ps); set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL); - if (local->ops->sta_notify_ps) - local->ops->sta_notify_ps(local_to_hw(local), STA_NOTIFY_SLEEP, - &sta->sta); + if (local->ops->sta_notify) + local->ops->sta_notify(local_to_hw(local), &sdata->vif, + STA_NOTIFY_SLEEP, &sta->sta); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n", sdata->dev->name, sta->sta.addr, sta->sta.aid); @@ -677,9 +677,9 @@ static int ap_sta_ps_end(struct sta_info *sta) atomic_dec(&sdata->bss->num_sta_ps); clear_sta_flags(sta, WLAN_STA_PS | WLAN_STA_PSPOLL); - if (local->ops->sta_notify_ps) - local->ops->sta_notify_ps(local_to_hw(local), STA_NOTIFY_AWAKE, - &sta->sta); + if (local->ops->sta_notify) + local->ops->sta_notify(local_to_hw(local), &sdata->vif, + STA_NOTIFY_AWAKE, &sta->sta); if (!skb_queue_empty(&sta->ps_tx_buf)) sta_info_clear_tim_bit(sta); -- cgit v1.2.3 From 4dec9b807be757780ca3611a959ac22c28d292a7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 10 Dec 2008 17:48:48 +0100 Subject: rfkill: strip pointless notifier chain No users, so no reason to have it. Signed-off-by: Johannes Berg Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- include/linux/rfkill.h | 7 ----- net/rfkill/rfkill.c | 83 +++----------------------------------------------- 2 files changed, 5 insertions(+), 85 deletions(-) (limited to 'include') diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index f376a93927f7..164332cbb77c 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -149,11 +149,4 @@ static inline char *rfkill_get_led_name(struct rfkill *rfkill) #endif } -/* rfkill notification chain */ -#define RFKILL_STATE_CHANGED 0x0001 /* state of a normal rfkill - switch has changed */ - -int register_rfkill_notifier(struct notifier_block *nb); -int unregister_rfkill_notifier(struct notifier_block *nb); - #endif /* RFKILL_H */ diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 051d2c9ea66b..3c94f76d5525 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -53,51 +53,6 @@ static struct rfkill_gsw_state rfkill_global_states[RFKILL_TYPE_MAX]; static unsigned long rfkill_states_lockdflt[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; static bool rfkill_epo_lock_active; -static BLOCKING_NOTIFIER_HEAD(rfkill_notifier_list); - - -/** - * register_rfkill_notifier - Add notifier to rfkill notifier chain - * @nb: pointer to the new entry to add to the chain - * - * See blocking_notifier_chain_register() for return value and further - * observations. - * - * Adds a notifier to the rfkill notifier chain. The chain will be - * called with a pointer to the relevant rfkill structure as a parameter, - * refer to include/linux/rfkill.h for the possible events. - * - * Notifiers added to this chain are to always return NOTIFY_DONE. This - * chain is a blocking notifier chain: notifiers can sleep. - * - * Calls to this chain may have been done through a workqueue. One must - * assume unordered asynchronous behaviour, there is no way to know if - * actions related to the event that generated the notification have been - * carried out already. - */ -int register_rfkill_notifier(struct notifier_block *nb) -{ - BUG_ON(!nb); - return blocking_notifier_chain_register(&rfkill_notifier_list, nb); -} -EXPORT_SYMBOL_GPL(register_rfkill_notifier); - -/** - * unregister_rfkill_notifier - remove notifier from rfkill notifier chain - * @nb: pointer to the entry to remove from the chain - * - * See blocking_notifier_chain_unregister() for return value and further - * observations. - * - * Removes a notifier from the rfkill notifier chain. - */ -int unregister_rfkill_notifier(struct notifier_block *nb) -{ - BUG_ON(!nb); - return blocking_notifier_chain_unregister(&rfkill_notifier_list, nb); -} -EXPORT_SYMBOL_GPL(unregister_rfkill_notifier); - static void rfkill_led_trigger(struct rfkill *rfkill, enum rfkill_state state) @@ -124,12 +79,9 @@ static void rfkill_led_trigger_activate(struct led_classdev *led) } #endif /* CONFIG_RFKILL_LEDS */ -static void notify_rfkill_state_change(struct rfkill *rfkill) +static void rfkill_uevent(struct rfkill *rfkill) { - rfkill_led_trigger(rfkill, rfkill->state); - blocking_notifier_call_chain(&rfkill_notifier_list, - RFKILL_STATE_CHANGED, - rfkill); + kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE); } static void update_rfkill_state(struct rfkill *rfkill) @@ -142,7 +94,7 @@ static void update_rfkill_state(struct rfkill *rfkill) oldstate = rfkill->state; rfkill->state = newstate; if (oldstate != newstate) - notify_rfkill_state_change(rfkill); + rfkill_uevent(rfkill); } mutex_unlock(&rfkill->mutex); } @@ -220,7 +172,7 @@ static int rfkill_toggle_radio(struct rfkill *rfkill, } if (force || rfkill->state != oldstate) - notify_rfkill_state_change(rfkill); + rfkill_uevent(rfkill); return retval; } @@ -405,7 +357,7 @@ int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state) rfkill->state = state; if (state != oldstate) - notify_rfkill_state_change(rfkill); + rfkill_uevent(rfkill); mutex_unlock(&rfkill->mutex); @@ -618,28 +570,6 @@ static int rfkill_resume(struct device *dev) #define rfkill_resume NULL #endif -static int rfkill_blocking_uevent_notifier(struct notifier_block *nb, - unsigned long eventid, - void *data) -{ - struct rfkill *rfkill = (struct rfkill *)data; - - switch (eventid) { - case RFKILL_STATE_CHANGED: - kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE); - break; - default: - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block rfkill_blocking_uevent_nb = { - .notifier_call = rfkill_blocking_uevent_notifier, - .priority = 0, -}; - static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env) { struct rfkill *rfkill = to_rfkill(dev); @@ -942,14 +872,11 @@ static int __init rfkill_init(void) return error; } - register_rfkill_notifier(&rfkill_blocking_uevent_nb); - return 0; } static void __exit rfkill_exit(void) { - unregister_rfkill_notifier(&rfkill_blocking_uevent_nb); class_unregister(&rfkill_class); } -- cgit v1.2.3 From b690ace50be7d10d77cb7a6d5ef1bd9de649852f Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 21 Oct 2008 14:07:03 +0100 Subject: [ARM] S3C6400: serial support for S3C6400 and S3C6410 SoCs Add support to the Samsung serial driver for the S3C6400 and S3C6410 serial ports. Signed-off-by: Ben Dooks --- arch/arm/mach-s3c2410/include/mach/map.h | 2 + arch/arm/mach-s3c24a0/include/mach/map.h | 2 + arch/arm/plat-s3c/include/plat/regs-serial.h | 6 ++ drivers/serial/Kconfig | 10 +- drivers/serial/Makefile | 1 + drivers/serial/s3c6400.c | 151 +++++++++++++++++++++++++++ drivers/serial/samsung.c | 8 +- include/linux/serial_core.h | 2 + 8 files changed, 178 insertions(+), 4 deletions(-) create mode 100644 drivers/serial/s3c6400.c (limited to 'include') diff --git a/arch/arm/mach-s3c2410/include/mach/map.h b/arch/arm/mach-s3c2410/include/mach/map.h index 6b30361a0805..918e3463297f 100644 --- a/arch/arm/mach-s3c2410/include/mach/map.h +++ b/arch/arm/mach-s3c2410/include/mach/map.h @@ -101,4 +101,6 @@ #define S3C24XX_PA_SDI S3C2410_PA_SDI #define S3C24XX_PA_NAND S3C2410_PA_NAND +#define S3C_PA_UART S3C24XX_PA_UART + #endif /* __ASM_ARCH_MAP_H */ diff --git a/arch/arm/mach-s3c24a0/include/mach/map.h b/arch/arm/mach-s3c24a0/include/mach/map.h index 2ce1839de4ee..6667355a47a1 100644 --- a/arch/arm/mach-s3c24a0/include/mach/map.h +++ b/arch/arm/mach-s3c24a0/include/mach/map.h @@ -80,4 +80,6 @@ #define S3C24XX_PA_SDI S3C24A0_PA_SDI #define S3C24XX_PA_NAND S3C24A0_PA_NAND +#define S3C_PA_UART S3C24A0_PA_UART + #endif /* __ASM_ARCH_24A0_MAP_H */ diff --git a/arch/arm/plat-s3c/include/plat/regs-serial.h b/arch/arm/plat-s3c/include/plat/regs-serial.h index 18ba31c7174c..3ca28585cf80 100644 --- a/arch/arm/plat-s3c/include/plat/regs-serial.h +++ b/arch/arm/plat-s3c/include/plat/regs-serial.h @@ -77,6 +77,12 @@ #define S3C2440_UCON_FCLK (3<<10) #define S3C2443_UCON_EPLL (3<<10) +#define S3C6400_UCON_CLKMASK (3<<10) +#define S3C6400_UCON_PCLK (0<<10) +#define S3C6400_UCON_PCLK2 (2<<10) +#define S3C6400_UCON_UCLK0 (1<<10) +#define S3C6400_UCON_UCLK1 (3<<10) + #define S3C2440_UCON2_FCLK_EN (1<<15) #define S3C2440_UCON0_DIVMASK (15 << 12) #define S3C2440_UCON1_DIVMASK (15 << 12) diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index f71a2e8a5f60..e4ae499e587e 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -447,7 +447,7 @@ config SERIAL_CLPS711X_CONSOLE config SERIAL_SAMSUNG tristate "Samsung SoC serial support" - depends on ARM && PLAT_S3C24XX + depends on ARM && PLAT_S3C select SERIAL_CORE help Support for the on-chip UARTs on the Samsung S3C24XX series CPUs, @@ -515,6 +515,14 @@ config SERIAL_S3C24A0 help Serial port support for the Samsung S3C24A0 SoC +config SERIAL_S3C6400 + tristate "Samsung S3C6400/S3C6410 Serial port support" + depends on SERIAL_SAMSUNG && (CPU_S3C600 || CPU_S3C6410) + default y + help + Serial port support for the Samsung S3C6400 and S3C6410 + SoCs + config SERIAL_DZ bool "DECstation DZ serial driver" depends on MACH_DECSTATION && 32BIT diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index 7769aece54ca..dfe775ac45b2 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_SERIAL_S3C2410) += s3c2410.o obj-$(CONFIG_SERIAL_S3C2412) += s3c2412.o obj-$(CONFIG_SERIAL_S3C2440) += s3c2440.o obj-$(CONFIG_SERIAL_S3C24A0) += s3c24a0.o +obj-$(CONFIG_SERIAL_S3C6400) += s3c6400.o obj-$(CONFIG_SERIAL_IP22_ZILOG) += ip22zilog.o obj-$(CONFIG_SERIAL_MUX) += mux.o obj-$(CONFIG_SERIAL_68328) += 68328serial.o diff --git a/drivers/serial/s3c6400.c b/drivers/serial/s3c6400.c new file mode 100644 index 000000000000..06936d13393f --- /dev/null +++ b/drivers/serial/s3c6400.c @@ -0,0 +1,151 @@ +/* linux/drivers/serial/s3c6400.c + * + * Driver for Samsung S3C6400 and S3C6410 SoC onboard UARTs. + * + * Copyright 2008 Openmoko, Inc. + * Copyright 2008 Simtec Electronics + * Ben Dooks + * http://armlinux.simtec.co.uk/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "samsung.h" + +static int s3c6400_serial_setsource(struct uart_port *port, + struct s3c24xx_uart_clksrc *clk) +{ + unsigned long ucon = rd_regl(port, S3C2410_UCON); + + if (strcmp(clk->name, "uclk0") == 0) { + ucon &= ~S3C6400_UCON_CLKMASK; + ucon |= S3C6400_UCON_UCLK0; + } else if (strcmp(clk->name, "uclk1") == 0) + ucon |= S3C6400_UCON_UCLK1; + else if (strcmp(clk->name, "pclk") == 0) { + /* See notes about transitioning from UCLK to PCLK */ + ucon &= ~S3C6400_UCON_UCLK0; + } else { + printk(KERN_ERR "unknown clock source %s\n", clk->name); + return -EINVAL; + } + + wr_regl(port, S3C2410_UCON, ucon); + return 0; +} + + +static int s3c6400_serial_getsource(struct uart_port *port, + struct s3c24xx_uart_clksrc *clk) +{ + u32 ucon = rd_regl(port, S3C2410_UCON); + + clk->divisor = 1; + + switch (ucon & S3C6400_UCON_CLKMASK) { + case S3C6400_UCON_UCLK0: + clk->name = "uclk0"; + break; + + case S3C6400_UCON_UCLK1: + clk->name = "uclk1"; + break; + + case S3C6400_UCON_PCLK: + case S3C6400_UCON_PCLK2: + clk->name = "pclk"; + break; + } + + return 0; +} + +static int s3c6400_serial_resetport(struct uart_port *port, + struct s3c2410_uartcfg *cfg) +{ + unsigned long ucon = rd_regl(port, S3C2410_UCON); + + dbg("s3c6400_serial_resetport: port=%p (%08lx), cfg=%p\n", + port, port->mapbase, cfg); + + /* ensure we don't change the clock settings... */ + + ucon &= S3C6400_UCON_CLKMASK; + + wr_regl(port, S3C2410_UCON, ucon | cfg->ucon); + wr_regl(port, S3C2410_ULCON, cfg->ulcon); + + /* reset both fifos */ + + wr_regl(port, S3C2410_UFCON, cfg->ufcon | S3C2410_UFCON_RESETBOTH); + wr_regl(port, S3C2410_UFCON, cfg->ufcon); + + return 0; +} + +static struct s3c24xx_uart_info s3c6400_uart_inf = { + .name = "Samsung S3C6400 UART", + .type = PORT_S3C6400, + .fifosize = 64, + .rx_fifomask = S3C2440_UFSTAT_RXMASK, + .rx_fifoshift = S3C2440_UFSTAT_RXSHIFT, + .rx_fifofull = S3C2440_UFSTAT_RXFULL, + .tx_fifofull = S3C2440_UFSTAT_TXFULL, + .tx_fifomask = S3C2440_UFSTAT_TXMASK, + .tx_fifoshift = S3C2440_UFSTAT_TXSHIFT, + .get_clksrc = s3c6400_serial_getsource, + .set_clksrc = s3c6400_serial_setsource, + .reset_port = s3c6400_serial_resetport, +}; + +/* device management */ + +static int s3c6400_serial_probe(struct platform_device *dev) +{ + dbg("s3c6400_serial_probe: dev=%p\n", dev); + return s3c24xx_serial_probe(dev, &s3c6400_uart_inf); +} + +static struct platform_driver s3c6400_serial_drv = { + .probe = s3c6400_serial_probe, + .remove = s3c24xx_serial_remove, + .driver = { + .name = "s3c6400-uart", + .owner = THIS_MODULE, + }, +}; + +s3c24xx_console_init(&s3c6400_serial_drv, &s3c6400_uart_inf); + +static int __init s3c6400_serial_init(void) +{ + return s3c24xx_serial_init(&s3c6400_serial_drv, &s3c6400_uart_inf); +} + +static void __exit s3c6400_serial_exit(void) +{ + platform_driver_unregister(&s3c6400_serial_drv); +} + +module_init(s3c6400_serial_init); +module_exit(s3c6400_serial_exit); + +MODULE_DESCRIPTION("Samsung S3C6400,S3C6410 SoC Serial port driver"); +MODULE_AUTHOR("Ben Dooks "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:s3c6400-uart"); diff --git a/drivers/serial/samsung.c b/drivers/serial/samsung.c index bb8b57aae3af..44fc38afa228 100644 --- a/drivers/serial/samsung.c +++ b/drivers/serial/samsung.c @@ -47,9 +47,9 @@ #include #include +#include #include -#include #include "samsung.h" @@ -756,6 +756,8 @@ static const char *s3c24xx_serial_type(struct uart_port *port) return "S3C2440"; case PORT_S3C2412: return "S3C2412"; + case PORT_S3C6400: + return "S3C6400/10"; default: return NULL; } @@ -1034,8 +1036,8 @@ static int s3c24xx_serial_init_port(struct s3c24xx_uart_port *ourport, dbg("resource %p (%lx..%lx)\n", res, res->start, res->end); - port->mapbase = res->start; - port->membase = S3C24XX_VA_UART + (res->start - S3C24XX_PA_UART); + port->mapbase = res->start; + port->membase = S3C_VA_UART + res->start - (S3C_PA_UART & 0xfff00000); ret = platform_get_irq(platdev, 0); if (ret < 0) port->irq = 0; diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 4e4f1277f3bf..feb3b939ec4b 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -158,6 +158,8 @@ /* SH-SCI */ #define PORT_SCIFA 83 +#define PORT_S3C6400 84 + #ifdef __KERNEL__ #include -- cgit v1.2.3 From d2ff911882b6bc693d86ca9566daac70aacbb2b3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 15 Dec 2008 19:04:35 +1030 Subject: Define smp_call_function_many for UP Otherwise those using it in transition patches (eg. kvm) can't compile with CONFIG_SMP=n: arch/x86/kvm/../../../virt/kvm/kvm_main.c: In function 'make_all_cpus_request': arch/x86/kvm/../../../virt/kvm/kvm_main.c:380: error: implicit declaration of function 'smp_call_function_many' Signed-off-by: Rusty Russell Signed-off-by: Linus Torvalds --- include/linux/smp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index 3f9a60043a97..6e7ba16ff454 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -146,6 +146,8 @@ static inline void smp_send_reschedule(int cpu) { } }) #define smp_call_function_mask(mask, func, info, wait) \ (up_smp_call_function(func, info)) +#define smp_call_function_many(mask, func, info, wait) \ + (up_smp_call_function(func, info)) static inline void init_call_single_data(void) { } -- cgit v1.2.3 From b53c7583e26746ef6f66c866841e10450150ed8e Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Thu, 4 Dec 2008 10:01:52 -0800 Subject: rapidio: struct device - replace bus_id with dev_name(), dev_set_name() Cc: Matt Porter Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman Signed-off-by: Paul Mackerras --- drivers/rapidio/rio-scan.c | 8 ++++---- include/linux/rio_drv.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index 643a6b98462b..5c13f61bfb1b 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -365,15 +365,15 @@ static struct rio_dev *rio_setup_device(struct rio_net *net, rdid++) rswitch->route_table[rdid] = RIO_INVALID_ROUTE; rdev->rswitch = rswitch; - sprintf(rio_name(rdev), "%02x:s:%04x", rdev->net->id, - rdev->rswitch->switchid); + dev_set_name(&rdev->dev, "%02x:s:%04x", rdev->net->id, + rdev->rswitch->switchid); rio_route_set_ops(rdev); list_add_tail(&rswitch->node, &rio_switches); } else - sprintf(rio_name(rdev), "%02x:e:%04x", rdev->net->id, - rdev->destid); + dev_set_name(&rdev->dev, "%02x:e:%04x", rdev->net->id, + rdev->destid); rdev->dev.bus = &rio_bus_type; diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h index 90987b7bcc1b..32c0547ffafc 100644 --- a/include/linux/rio_drv.h +++ b/include/linux/rio_drv.h @@ -427,9 +427,9 @@ void rio_dev_put(struct rio_dev *); * Get the unique RIO device identifier. Returns the device * identifier string. */ -static inline char *rio_name(struct rio_dev *rdev) +static inline const char *rio_name(struct rio_dev *rdev) { - return rdev->dev.bus_id; + return dev_name(&rdev->dev); } /** -- cgit v1.2.3 From 1a881f27c50b4fbd6858a8696a189263621136b0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:27:47 -0800 Subject: net: Add frag_list support to GSO This patch allows GSO to handle frag_list in a limited way for the purposes of allowing packets merged by GRO to be refragmented on output. Most hardware won't (and aren't expected to) support handling GRO frag_list packets directly. Therefore we will perform GSO in software for those cases. However, for drivers that can support it (such as virtual NICs) we may not have to segment the packets at all. Whether the added overhead of GRO/GSO is worthwhile for bridges and routers when weighed against the benefit of potentially increasing the MTU within the host is still an open question. However, for the case of host nodes this is undoubtedly a win. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b60c26b7d31c..bdf5465deb91 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1858,6 +1858,8 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { return skb_is_gso(skb) && (!skb_gso_ok(skb, dev->features) || + (skb_shinfo(skb)->frag_list && + !(dev->features & NETIF_F_FRAGLIST)) || unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); } diff --git a/net/core/dev.c b/net/core/dev.c index f54cac76438a..e415f0b0d0d0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1533,8 +1533,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) __be16 type = skb->protocol; int err; - BUG_ON(skb_shinfo(skb)->frag_list); - skb_reset_mac_header(skb); skb->mac_len = skb->network_header - skb->mac_header; __skb_pull(skb, skb->mac_len); -- cgit v1.2.3 From d565b0a1a9b6ee7dff46e1f68b26b526ac11ae50 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:38:52 -0800 Subject: net: Add Generic Receive Offload infrastructure This patch adds the top-level GRO (Generic Receive Offload) infrastructure. This is pretty similar to LRO except that this is protocol-independent. Instead of holding packets in an lro_mgr structure, they're now held in napi_struct. For drivers that intend to use this, they can set the NETIF_F_GRO bit and call napi_gro_receive instead of netif_receive_skb or just call netif_rx. The latter will call napi_receive_skb automatically. When napi_gro_receive is used, the driver must either call napi_complete/napi_rx_complete, or call napi_gro_flush in softirq context if the driver uses the primitives __napi_complete/__napi_rx_complete. Protocols will set the gro_receive and gro_complete function pointers in order to participate in this scheme. In addition to the packet, gro_receive will get a list of currently held packets. Each packet in the list has a same_flow field which is non-zero if it is a potential match for the new packet. For each packet that may match, they also have a flush field which is non-zero if the held packet must not be merged with the new packet. Once gro_receive has determined that the new skb matches a held packet, the held packet may be processed immediately if the new skb cannot be merged with it. In this case gro_receive should return the pointer to the existing skb in gro_list. Otherwise the new skb should be merged into the existing packet and NULL should be returned, unless the new skb makes it impossible for any further merges to be made (e.g., FIN packet) where the merged skb should be returned. Whenever the skb is merged into an existing entry, the gro_receive function should set NAPI_GRO_CB(skb)->same_flow. Note that if an skb merely matches an existing entry but can't be merged with it, then this shouldn't be set. If gro_receive finds it pointless to hold the new skb for future merging, it should set NAPI_GRO_CB(skb)->flush. Held packets will be flushed by napi_gro_flush which is called by napi_complete and napi_rx_complete. Currently held packets are stored in a singly liked list just like LRO. The list is limited to a maximum of 8 entries. In future, this may be expanded to use a hash table to allow more flows to be held for merging. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 80 +++++++------------ include/linux/netpoll.h | 5 -- net/core/dev.c | 193 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 219 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bdf5465deb91..58856b6737fb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -314,8 +314,9 @@ struct napi_struct { spinlock_t poll_lock; int poll_owner; struct net_device *dev; - struct list_head dev_list; #endif + struct list_head dev_list; + struct sk_buff *gro_list; }; enum @@ -376,22 +377,8 @@ static inline int napi_reschedule(struct napi_struct *napi) * * Mark NAPI processing as complete. */ -static inline void __napi_complete(struct napi_struct *n) -{ - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - list_del(&n->poll_list); - smp_mb__before_clear_bit(); - clear_bit(NAPI_STATE_SCHED, &n->state); -} - -static inline void napi_complete(struct napi_struct *n) -{ - unsigned long flags; - - local_irq_save(flags); - __napi_complete(n); - local_irq_restore(flags); -} +extern void __napi_complete(struct napi_struct *n); +extern void napi_complete(struct napi_struct *n); /** * napi_disable - prevent NAPI from scheduling @@ -640,9 +627,7 @@ struct net_device unsigned long state; struct list_head dev_list; -#ifdef CONFIG_NETPOLL struct list_head napi_list; -#endif /* Net device features */ unsigned long features; @@ -661,6 +646,7 @@ struct net_device #define NETIF_F_LLTX 4096 /* LockLess TX - deprecated. Please */ /* do not use LLTX in new drivers */ #define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ +#define NETIF_F_GRO 16384 /* Generic receive offload */ #define NETIF_F_LRO 32768 /* large receive offload */ /* Segmentation offload features */ @@ -984,22 +970,8 @@ static inline void *netdev_priv(const struct net_device *dev) * netif_napi_add() must be used to initialize a napi context prior to calling * *any* of the other napi related functions. */ -static inline void netif_napi_add(struct net_device *dev, - struct napi_struct *napi, - int (*poll)(struct napi_struct *, int), - int weight) -{ - INIT_LIST_HEAD(&napi->poll_list); - napi->poll = poll; - napi->weight = weight; -#ifdef CONFIG_NETPOLL - napi->dev = dev; - list_add(&napi->dev_list, &dev->napi_list); - spin_lock_init(&napi->poll_lock); - napi->poll_owner = -1; -#endif - set_bit(NAPI_STATE_SCHED, &napi->state); -} +void netif_napi_add(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight); /** * netif_napi_del - remove a napi context @@ -1007,12 +979,20 @@ static inline void netif_napi_add(struct net_device *dev, * * netif_napi_del() removes a napi context from the network device napi list */ -static inline void netif_napi_del(struct napi_struct *napi) -{ -#ifdef CONFIG_NETPOLL - list_del(&napi->dev_list); -#endif -} +void netif_napi_del(struct napi_struct *napi); + +struct napi_gro_cb { + /* This is non-zero if the packet may be of the same flow. */ + int same_flow; + + /* This is non-zero if the packet cannot be merged with the new skb. */ + int flush; + + /* Number of segments aggregated. */ + int count; +}; + +#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) struct packet_type { __be16 type; /* This is really htons(ether_type). */ @@ -1024,6 +1004,9 @@ struct packet_type { struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); int (*gso_send_check)(struct sk_buff *skb); + struct sk_buff **(*gro_receive)(struct sk_buff **head, + struct sk_buff *skb); + int (*gro_complete)(struct sk_buff *skb); void *af_packet_priv; struct list_head list; }; @@ -1377,6 +1360,9 @@ extern int netif_rx(struct sk_buff *skb); extern int netif_rx_ni(struct sk_buff *skb); #define HAVE_NETIF_RECEIVE_SKB 1 extern int netif_receive_skb(struct sk_buff *skb); +extern void napi_gro_flush(struct napi_struct *napi); +extern int napi_gro_receive(struct napi_struct *napi, + struct sk_buff *skb); extern void netif_nit_deliver(struct sk_buff *skb); extern int dev_valid_name(const char *name); extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); @@ -1621,17 +1607,7 @@ static inline void __netif_rx_complete(struct net_device *dev, static inline void netif_rx_complete(struct net_device *dev, struct napi_struct *napi) { - unsigned long flags; - - /* - * don't let napi dequeue from the cpu poll list - * just in case its running on a different cpu - */ - if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state))) - return; - local_irq_save(flags); - __netif_rx_complete(dev, napi); - local_irq_restore(flags); + napi_complete(napi); } static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index e3d79593fb3a..e38d3c9dccda 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -94,11 +94,6 @@ static inline void netpoll_poll_unlock(void *have) rcu_read_unlock(); } -static inline void netpoll_netdev_init(struct net_device *dev) -{ - INIT_LIST_HEAD(&dev->napi_list); -} - #else static inline int netpoll_rx(struct sk_buff *skb) { diff --git a/net/core/dev.c b/net/core/dev.c index e415f0b0d0d0..d8d7d1fccde4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -129,6 +129,9 @@ #include "net-sysfs.h" +/* Instead of increasing this, you should create a hash table. */ +#define MAX_GRO_SKBS 8 + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -2335,6 +2338,122 @@ static void flush_backlog(void *arg) } } +static int napi_gro_complete(struct sk_buff *skb) +{ + struct packet_type *ptype; + __be16 type = skb->protocol; + struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + int err = -ENOENT; + + if (!skb_shinfo(skb)->frag_list) + goto out; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, head, list) { + if (ptype->type != type || ptype->dev || !ptype->gro_complete) + continue; + + err = ptype->gro_complete(skb); + break; + } + rcu_read_unlock(); + + if (err) { + WARN_ON(&ptype->list == head); + kfree_skb(skb); + return NET_RX_SUCCESS; + } + +out: + __skb_push(skb, -skb_network_offset(skb)); + return netif_receive_skb(skb); +} + +void napi_gro_flush(struct napi_struct *napi) +{ + struct sk_buff *skb, *next; + + for (skb = napi->gro_list; skb; skb = next) { + next = skb->next; + skb->next = NULL; + napi_gro_complete(skb); + } + + napi->gro_list = NULL; +} +EXPORT_SYMBOL(napi_gro_flush); + +int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct packet_type *ptype; + __be16 type = skb->protocol; + struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + int count = 0; + int mac_len; + + if (!(skb->dev->features & NETIF_F_GRO)) + goto normal; + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, head, list) { + struct sk_buff *p; + + if (ptype->type != type || ptype->dev || !ptype->gro_receive) + continue; + + skb_reset_network_header(skb); + mac_len = skb->network_header - skb->mac_header; + skb->mac_len = mac_len; + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 0; + + for (p = napi->gro_list; p; p = p->next) { + count++; + NAPI_GRO_CB(p)->same_flow = + p->mac_len == mac_len && + !memcmp(skb_mac_header(p), skb_mac_header(skb), + mac_len); + NAPI_GRO_CB(p)->flush = 0; + } + + pp = ptype->gro_receive(&napi->gro_list, skb); + break; + } + rcu_read_unlock(); + + if (&ptype->list == head) + goto normal; + + if (pp) { + struct sk_buff *nskb = *pp; + + *pp = nskb->next; + nskb->next = NULL; + napi_gro_complete(nskb); + count--; + } + + if (NAPI_GRO_CB(skb)->same_flow) + goto ok; + + if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { + __skb_push(skb, -skb_network_offset(skb)); + goto normal; + } + + NAPI_GRO_CB(skb)->count = 1; + skb->next = napi->gro_list; + napi->gro_list = skb; + +ok: + return NET_RX_SUCCESS; + +normal: + return netif_receive_skb(skb); +} +EXPORT_SYMBOL(napi_gro_receive); + static int process_backlog(struct napi_struct *napi, int quota) { int work = 0; @@ -2354,9 +2473,11 @@ static int process_backlog(struct napi_struct *napi, int quota) } local_irq_enable(); - netif_receive_skb(skb); + napi_gro_receive(napi, skb); } while (++work < quota && jiffies == start_time); + napi_gro_flush(napi); + return work; } @@ -2377,6 +2498,68 @@ void __napi_schedule(struct napi_struct *n) } EXPORT_SYMBOL(__napi_schedule); +void __napi_complete(struct napi_struct *n) +{ + BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); + BUG_ON(n->gro_list); + + list_del(&n->poll_list); + smp_mb__before_clear_bit(); + clear_bit(NAPI_STATE_SCHED, &n->state); +} +EXPORT_SYMBOL(__napi_complete); + +void napi_complete(struct napi_struct *n) +{ + unsigned long flags; + + /* + * don't let napi dequeue from the cpu poll list + * just in case its running on a different cpu + */ + if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) + return; + + napi_gro_flush(n); + local_irq_save(flags); + __napi_complete(n); + local_irq_restore(flags); +} +EXPORT_SYMBOL(napi_complete); + +void netif_napi_add(struct net_device *dev, struct napi_struct *napi, + int (*poll)(struct napi_struct *, int), int weight) +{ + INIT_LIST_HEAD(&napi->poll_list); + napi->gro_list = NULL; + napi->poll = poll; + napi->weight = weight; + list_add(&napi->dev_list, &dev->napi_list); +#ifdef CONFIG_NETPOLL + napi->dev = dev; + spin_lock_init(&napi->poll_lock); + napi->poll_owner = -1; +#endif + set_bit(NAPI_STATE_SCHED, &napi->state); +} +EXPORT_SYMBOL(netif_napi_add); + +void netif_napi_del(struct napi_struct *napi) +{ + struct sk_buff *skb, *next; + + list_del(&napi->dev_list); + + for (skb = napi->gro_list; skb; skb = next) { + next = skb->next; + skb->next = NULL; + kfree_skb(skb); + } + + napi->gro_list = NULL; +} +EXPORT_SYMBOL(netif_napi_del); + static void net_rx_action(struct softirq_action *h) { @@ -4380,7 +4563,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, netdev_init_queues(dev); - netpoll_netdev_init(dev); + INIT_LIST_HEAD(&dev->napi_list); setup(dev); strcpy(dev->name, name); return dev; @@ -4397,10 +4580,15 @@ EXPORT_SYMBOL(alloc_netdev_mq); */ void free_netdev(struct net_device *dev) { + struct napi_struct *p, *n; + release_net(dev_net(dev)); kfree(dev->_tx); + list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) + netif_napi_del(p); + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); @@ -4949,6 +5137,7 @@ static int __init net_dev_init(void) queue->backlog.poll = process_backlog; queue->backlog.weight = weight_p; + queue->backlog.gro_list = NULL; } dev_boot_phase = 0; -- cgit v1.2.3 From 73cc19f1556b95976934de236fd9043f7208844f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:41:09 -0800 Subject: ipv4: Add GRO infrastructure This patch adds GRO support for IPv4. The criteria for merging is more stringent than LRO, in particular, we require all fields in the IP header to be identical except for the length, ID and checksum. In addition, the ID must form an arithmetic sequence with a difference of one. The ID requirement might seem overly strict, however, most hardware TSO solutions already obey this rule. Linux itself also obeys this whether GSO is in use or not. In future we could relax this rule by storing the IDs (or rather making sure that we don't drop them when pulling the aggregate skb's tail). Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/protocol.h | 3 ++ net/ipv4/af_inet.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) (limited to 'include') diff --git a/include/net/protocol.h b/include/net/protocol.h index 8d024d7cb741..cb2965aa1b62 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -39,6 +39,9 @@ struct net_protocol { int (*gso_send_check)(struct sk_buff *skb); struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); + struct sk_buff **(*gro_receive)(struct sk_buff **head, + struct sk_buff *skb); + int (*gro_complete)(struct sk_buff *skb); unsigned int no_policy:1, netns_ok:1; }; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fe03048c130d..a85595307fa7 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -94,6 +94,7 @@ #include #include #include +#include #include #include #include @@ -1241,6 +1242,100 @@ out: return segs; } +static struct sk_buff **inet_gro_receive(struct sk_buff **head, + struct sk_buff *skb) +{ + struct net_protocol *ops; + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct iphdr *iph; + int flush = 1; + int proto; + int id; + + if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) + goto out; + + iph = ip_hdr(skb); + proto = iph->protocol & (MAX_INET_PROTOS - 1); + + rcu_read_lock(); + ops = rcu_dereference(inet_protos[proto]); + if (!ops || !ops->gro_receive) + goto out_unlock; + + if (iph->version != 4 || iph->ihl != 5) + goto out_unlock; + + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + goto out_unlock; + + flush = ntohs(iph->tot_len) != skb->len || + iph->frag_off != htons(IP_DF); + id = ntohs(iph->id); + + for (p = *head; p; p = p->next) { + struct iphdr *iph2; + + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + iph2 = ip_hdr(p); + + if (iph->protocol != iph2->protocol || + iph->tos != iph2->tos || + memcmp(&iph->saddr, &iph2->saddr, 8)) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + /* All fields must match except length and checksum. */ + NAPI_GRO_CB(p)->flush |= + memcmp(&iph->frag_off, &iph2->frag_off, 4) || + (u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id; + + NAPI_GRO_CB(p)->flush |= flush; + } + + NAPI_GRO_CB(skb)->flush |= flush; + __skb_pull(skb, sizeof(*iph)); + skb_reset_transport_header(skb); + + pp = ops->gro_receive(head, skb); + +out_unlock: + rcu_read_unlock(); + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +static int inet_gro_complete(struct sk_buff *skb) +{ + struct net_protocol *ops; + struct iphdr *iph = ip_hdr(skb); + int proto = iph->protocol & (MAX_INET_PROTOS - 1); + int err = -ENOSYS; + __be16 newlen = htons(skb->len - skb_network_offset(skb)); + + csum_replace2(&iph->check, iph->tot_len, newlen); + iph->tot_len = newlen; + + rcu_read_lock(); + ops = rcu_dereference(inet_protos[proto]); + if (WARN_ON(!ops || !ops->gro_complete)) + goto out_unlock; + + err = ops->gro_complete(skb); + +out_unlock: + rcu_read_unlock(); + + return err; +} + int inet_ctl_sock_create(struct sock **sk, unsigned short family, unsigned short type, unsigned char protocol, struct net *net) @@ -1407,6 +1502,8 @@ static struct packet_type ip_packet_type = { .func = ip_rcv, .gso_send_check = inet_gso_send_check, .gso_segment = inet_gso_segment, + .gro_receive = inet_gro_receive, + .gro_complete = inet_gro_complete, }; static int __init inet_init(void) -- cgit v1.2.3 From 71d93b39e52e92aea35f1058d957cf12250d0b75 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:42:33 -0800 Subject: net: Add skb_gro_receive This patch adds the helper skb_gro_receive to merge packets for GRO. The current method is to allocate a new header skb and then chain the original packets to its frag_list. This is done to make it easier to integrate into the existing GSO framework. In future as GSO is moved into the drivers, we can undo this and simply chain the original packets together. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ net/core/skbuff.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index acf17af45af9..cf2cb50f77d1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1687,6 +1687,8 @@ extern int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); extern struct sk_buff *skb_segment(struct sk_buff *skb, int features); +extern int skb_gro_receive(struct sk_buff **head, + struct sk_buff *skb); static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 18e224af05a6..b8d0abb26433 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2582,6 +2582,65 @@ err: EXPORT_SYMBOL_GPL(skb_segment); +int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct sk_buff *p = *head; + struct sk_buff *nskb; + unsigned int headroom; + unsigned int hlen = p->data - skb_mac_header(p); + + if (hlen + p->len + skb->len >= 65536) + return -E2BIG; + + if (skb_shinfo(p)->frag_list) + goto merge; + + headroom = skb_headroom(p); + nskb = netdev_alloc_skb(p->dev, headroom); + if (unlikely(!nskb)) + return -ENOMEM; + + __copy_skb_header(nskb, p); + nskb->mac_len = p->mac_len; + + skb_reserve(nskb, headroom); + + skb_set_mac_header(nskb, -hlen); + skb_set_network_header(nskb, skb_network_offset(p)); + skb_set_transport_header(nskb, skb_transport_offset(p)); + + memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen); + + *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); + skb_shinfo(nskb)->frag_list = p; + skb_header_release(p); + nskb->prev = p; + + nskb->data_len += p->len; + nskb->truesize += p->len; + nskb->len += p->len; + + *head = nskb; + nskb->next = p->next; + p->next = NULL; + + p = nskb; + +merge: + NAPI_GRO_CB(p)->count++; + p->prev->next = skb; + p->prev = skb; + skb_header_release(skb); + + p->data_len += skb->len; + p->truesize += skb->len; + p->len += skb->len; + + NAPI_GRO_CB(skb)->same_flow = 1; + return 0; +} +EXPORT_SYMBOL_GPL(skb_gro_receive); + void __init skb_init(void) { skbuff_head_cache = kmem_cache_create("skbuff_head_cache", -- cgit v1.2.3 From bf296b125b21b8d558ceb6ec30bb4eba2730cd6b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:43:36 -0800 Subject: tcp: Add GRO support This patch adds the TCP-specific portion of GRO. The criterion for merging is extremely strict (the TCP header must match exactly apart from the checksum) so as to allow refragmentation. Otherwise this is pretty much identical to LRO, except that we support the merging of ECN packets. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/tcp.h | 6 ++++ net/ipv4/af_inet.c | 2 ++ net/ipv4/tcp.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_ipv4.c | 35 ++++++++++++++++++ 4 files changed, 143 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index de1e91d959b8..218235de8963 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1358,6 +1358,12 @@ extern void tcp_v4_destroy_sock(struct sock *sk); extern int tcp_v4_gso_send_check(struct sk_buff *skb); extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features); +extern struct sk_buff **tcp_gro_receive(struct sk_buff **head, + struct sk_buff *skb); +extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head, + struct sk_buff *skb); +extern int tcp_gro_complete(struct sk_buff *skb); +extern int tcp4_gro_complete(struct sk_buff *skb); #ifdef CONFIG_PROC_FS extern int tcp4_proc_init(void); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a85595307fa7..664ff0ee1c83 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1410,6 +1410,8 @@ static struct net_protocol tcp_protocol = { .err_handler = tcp_v4_err, .gso_send_check = tcp_v4_gso_send_check, .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, .no_policy = 1, .netns_ok = 1, }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 019243408623..1f3d52946b3b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2465,6 +2465,106 @@ out: } EXPORT_SYMBOL(tcp_tso_segment); +struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct tcphdr *th; + struct tcphdr *th2; + unsigned int thlen; + unsigned int flags; + unsigned int total; + unsigned int mss = 1; + int flush = 1; + + if (!pskb_may_pull(skb, sizeof(*th))) + goto out; + + th = tcp_hdr(skb); + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + if (!pskb_may_pull(skb, thlen)) + goto out; + + th = tcp_hdr(skb); + __skb_pull(skb, thlen); + + flags = tcp_flag_word(th); + + for (; (p = *head); head = &p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + th2 = tcp_hdr(p); + + if (th->source != th2->source || th->dest != th2->dest) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + goto found; + } + + goto out_check_final; + +found: + flush = NAPI_GRO_CB(p)->flush; + flush |= flags & TCP_FLAG_CWR; + flush |= (flags ^ tcp_flag_word(th2)) & + ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); + flush |= th->ack_seq != th2->ack_seq || th->window != th2->window; + flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th)); + + total = p->len; + mss = total; + if (skb_shinfo(p)->frag_list) + mss = skb_shinfo(p)->frag_list->len; + + flush |= skb->len > mss || skb->len <= 0; + flush |= ntohl(th2->seq) + total != ntohl(th->seq); + + if (flush || skb_gro_receive(head, skb)) { + mss = 1; + goto out_check_final; + } + + p = *head; + th2 = tcp_hdr(p); + tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); + +out_check_final: + flush = skb->len < mss; + flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | + TCP_FLAG_SYN | TCP_FLAG_FIN); + + if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) + pp = head; + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} + +int tcp_gro_complete(struct sk_buff *skb) +{ + struct tcphdr *th = tcp_hdr(skb); + + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + + skb_shinfo(skb)->gso_size = skb_shinfo(skb)->frag_list->len; + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + if (th->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + + return 0; +} + #ifdef CONFIG_TCP_MD5SIG static unsigned long tcp_md5sig_users; static struct tcp_md5sig_pool **tcp_md5sig_pool; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 26b9030747cc..10172487921b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2346,6 +2346,41 @@ void tcp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr, + skb->csum)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } + + /* fall through */ + case CHECKSUM_NONE: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + } + + return tcp_gro_receive(head, skb); +} +EXPORT_SYMBOL(tcp4_gro_receive); + +int tcp4_gro_complete(struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), + iph->saddr, iph->daddr, 0); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + return tcp_gro_complete(skb); +} +EXPORT_SYMBOL(tcp4_gro_complete); + struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, -- cgit v1.2.3 From b240a0e5644eb817c4a397098a40e1ad42a615bc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 15 Dec 2008 23:44:31 -0800 Subject: ethtool: Add GGRO and SGRO ops This patch adds the ethtool ops to enable and disable GRO. It also makes GRO depend on RX checksum offload much the same as how TSO depends on SG support. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ net/core/ethtool.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index b4b038b89ee6..27c67a542235 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -467,6 +467,8 @@ struct ethtool_ops { #define ETHTOOL_GRXFH 0x00000029 /* Get RX flow hash configuration */ #define ETHTOOL_SRXFH 0x0000002a /* Set RX flow hash configuration */ +#define ETHTOOL_GGRO 0x0000002b /* Get GRO enable (ethtool_value) */ +#define ETHTOOL_SGRO 0x0000002c /* Set GRO enable (ethtool_value) */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 14ada537f895..947710a36ced 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -528,6 +528,22 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_tx_csum(dev, edata.data); } +static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata; + + if (!dev->ethtool_ops->set_rx_csum) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + if (!edata.data && dev->ethtool_ops->set_sg) + dev->features &= ~NETIF_F_GRO; + + return dev->ethtool_ops->set_rx_csum(dev, edata.data); +} + static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -599,6 +615,34 @@ static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) return 0; } +static int ethtool_get_gro(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GGRO }; + + edata.data = dev->features & NETIF_F_GRO; + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_gro(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + if (edata.data) { + if (!dev->ethtool_ops->get_rx_csum || + !dev->ethtool_ops->get_rx_csum(dev)) + return -EINVAL; + dev->features |= NETIF_F_GRO; + } else + dev->features &= ~NETIF_F_GRO; + + return 0; +} + static int ethtool_self_test(struct net_device *dev, char __user *useraddr) { struct ethtool_test test; @@ -932,8 +976,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) dev->ethtool_ops->get_rx_csum); break; case ETHTOOL_SRXCSUM: - rc = ethtool_set_value(dev, useraddr, - dev->ethtool_ops->set_rx_csum); + rc = ethtool_set_rx_csum(dev, useraddr); break; case ETHTOOL_GTXCSUM: rc = ethtool_get_value(dev, useraddr, ethcmd, @@ -1014,6 +1057,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXFH: rc = ethtool_set_rxhash(dev, useraddr); break; + case ETHTOOL_GGRO: + rc = ethtool_get_gro(dev, useraddr); + break; + case ETHTOOL_SGRO: + rc = ethtool_set_gro(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.2.3 From 092cab7e2cd868cb0b30209a0337689c3ffd6133 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 16 Dec 2008 01:19:41 -0800 Subject: netfilter: ctnetlink: fix missing CTA_NAT_SEQ_UNSPEC This patch fixes an inconsistency in nfnetlink_conntrack.h that I introduced myself. The problem is that CTA_NAT_SEQ_UNSPEC is missing from enum ctattr_natseq. This inconsistency may lead to problems in the message parsing in userspace (if the message contains the CTA_NAT_SEQ_* attributes, of course). This patch breaks backward compatibility, however, the only known client of this code is libnetfilter_conntrack which indeed crashes because it assumes the existence of CTA_NAT_SEQ_UNSPEC to do the parsing. The CTA_NAT_SEQ_* attributes were introduced in 2.6.25. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index c19595c89304..29fe9ea1d346 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -141,6 +141,7 @@ enum ctattr_protonat { #define CTA_PROTONAT_MAX (__CTA_PROTONAT_MAX - 1) enum ctattr_natseq { + CTA_NAT_SEQ_UNSPEC, CTA_NAT_SEQ_CORRECTION_POS, CTA_NAT_SEQ_OFFSET_BEFORE, CTA_NAT_SEQ_OFFSET_AFTER, -- cgit v1.2.3 From e18ce3465477502108187c6c08b6423fb784a313 Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Tue, 16 Dec 2008 02:00:00 -0800 Subject: net: Move flow control definitions to mii.h flags used within drivers for indicating tx and rx flow control are defined in 4 drivers (and probably more), move these constants to mii.h. The 3 SMSC drivers use the same constants (FLOW_CTRL_TX), but TG3 uses TG3_FLOW_CTRL_TX, so this patch also renames the constants within TG3. Signed-off-by: Steve Glendinning Signed-off-by: David S. Miller --- drivers/net/smsc911x.h | 3 --- drivers/net/smsc9420.h | 3 --- drivers/net/tg3.c | 50 +++++++++++++++++++++++----------------------- drivers/net/tg3.h | 2 -- drivers/net/usb/smsc95xx.c | 2 -- include/linux/mii.h | 4 ++++ 6 files changed, 29 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/drivers/net/smsc911x.h b/drivers/net/smsc911x.h index f818cf0415f7..2b76654bb958 100644 --- a/drivers/net/smsc911x.h +++ b/drivers/net/smsc911x.h @@ -61,9 +61,6 @@ #define SMSC_ASSERT_MAC_LOCK(pdata) do {} while (0) #endif /* CONFIG_DEBUG_SPINLOCK */ -#define FLOW_CTRL_TX (1) -#define FLOW_CTRL_RX (2) - /* SMSC911x registers and bitfields */ #define RX_DATA_FIFO 0x00 diff --git a/drivers/net/smsc9420.h b/drivers/net/smsc9420.h index 80c426dc4325..69c351f93f86 100644 --- a/drivers/net/smsc9420.h +++ b/drivers/net/smsc9420.h @@ -45,9 +45,6 @@ #define SMSC9420_EEPROM_SIZE ((u32)11) -#define FLOW_CTRL_TX (1) -#define FLOW_CTRL_RX (2) - #define PKT_BUF_SZ (VLAN_ETH_FRAME_LEN + NET_IP_ALIGN + 4) /***********************************************/ diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 6e40d52107a4..f353f69caeb8 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -1187,9 +1187,9 @@ static void tg3_link_report(struct tg3 *tp) printk(KERN_INFO PFX "%s: Flow control is %s for TX and %s for RX.\n", tp->dev->name, - (tp->link_config.active_flowctrl & TG3_FLOW_CTRL_TX) ? + (tp->link_config.active_flowctrl & FLOW_CTRL_TX) ? "on" : "off", - (tp->link_config.active_flowctrl & TG3_FLOW_CTRL_RX) ? + (tp->link_config.active_flowctrl & FLOW_CTRL_RX) ? "on" : "off"); tg3_ump_link_report(tp); } @@ -1199,11 +1199,11 @@ static u16 tg3_advert_flowctrl_1000T(u8 flow_ctrl) { u16 miireg; - if ((flow_ctrl & TG3_FLOW_CTRL_TX) && (flow_ctrl & TG3_FLOW_CTRL_RX)) + if ((flow_ctrl & FLOW_CTRL_TX) && (flow_ctrl & FLOW_CTRL_RX)) miireg = ADVERTISE_PAUSE_CAP; - else if (flow_ctrl & TG3_FLOW_CTRL_TX) + else if (flow_ctrl & FLOW_CTRL_TX) miireg = ADVERTISE_PAUSE_ASYM; - else if (flow_ctrl & TG3_FLOW_CTRL_RX) + else if (flow_ctrl & FLOW_CTRL_RX) miireg = ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; else miireg = 0; @@ -1215,11 +1215,11 @@ static u16 tg3_advert_flowctrl_1000X(u8 flow_ctrl) { u16 miireg; - if ((flow_ctrl & TG3_FLOW_CTRL_TX) && (flow_ctrl & TG3_FLOW_CTRL_RX)) + if ((flow_ctrl & FLOW_CTRL_TX) && (flow_ctrl & FLOW_CTRL_RX)) miireg = ADVERTISE_1000XPAUSE; - else if (flow_ctrl & TG3_FLOW_CTRL_TX) + else if (flow_ctrl & FLOW_CTRL_TX) miireg = ADVERTISE_1000XPSE_ASYM; - else if (flow_ctrl & TG3_FLOW_CTRL_RX) + else if (flow_ctrl & FLOW_CTRL_RX) miireg = ADVERTISE_1000XPAUSE | ADVERTISE_1000XPSE_ASYM; else miireg = 0; @@ -1256,16 +1256,16 @@ static u8 tg3_resolve_flowctrl_1000X(u16 lcladv, u16 rmtadv) if (lcladv & ADVERTISE_1000XPAUSE) { if (lcladv & ADVERTISE_1000XPSE_ASYM) { if (rmtadv & LPA_1000XPAUSE) - cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX; + cap = FLOW_CTRL_TX | FLOW_CTRL_RX; else if (rmtadv & LPA_1000XPAUSE_ASYM) - cap = TG3_FLOW_CTRL_RX; + cap = FLOW_CTRL_RX; } else { if (rmtadv & LPA_1000XPAUSE) - cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX; + cap = FLOW_CTRL_TX | FLOW_CTRL_RX; } } else if (lcladv & ADVERTISE_1000XPSE_ASYM) { if ((rmtadv & LPA_1000XPAUSE) && (rmtadv & LPA_1000XPAUSE_ASYM)) - cap = TG3_FLOW_CTRL_TX; + cap = FLOW_CTRL_TX; } return cap; @@ -1294,7 +1294,7 @@ static void tg3_setup_flow_control(struct tg3 *tp, u32 lcladv, u32 rmtadv) tp->link_config.active_flowctrl = flowctrl; - if (flowctrl & TG3_FLOW_CTRL_RX) + if (flowctrl & FLOW_CTRL_RX) tp->rx_mode |= RX_MODE_FLOW_CTRL_ENABLE; else tp->rx_mode &= ~RX_MODE_FLOW_CTRL_ENABLE; @@ -1302,7 +1302,7 @@ static void tg3_setup_flow_control(struct tg3 *tp, u32 lcladv, u32 rmtadv) if (old_rx_mode != tp->rx_mode) tw32_f(MAC_RX_MODE, tp->rx_mode); - if (flowctrl & TG3_FLOW_CTRL_TX) + if (flowctrl & FLOW_CTRL_TX) tp->tx_mode |= TX_MODE_FLOW_CTRL_ENABLE; else tp->tx_mode &= ~TX_MODE_FLOW_CTRL_ENABLE; @@ -9419,12 +9419,12 @@ static void tg3_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam epause->autoneg = (tp->tg3_flags & TG3_FLAG_PAUSE_AUTONEG) != 0; - if (tp->link_config.active_flowctrl & TG3_FLOW_CTRL_RX) + if (tp->link_config.active_flowctrl & FLOW_CTRL_RX) epause->rx_pause = 1; else epause->rx_pause = 0; - if (tp->link_config.active_flowctrl & TG3_FLOW_CTRL_TX) + if (tp->link_config.active_flowctrl & FLOW_CTRL_TX) epause->tx_pause = 1; else epause->tx_pause = 0; @@ -9475,14 +9475,14 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam } } else { if (epause->rx_pause) - tp->link_config.flowctrl |= TG3_FLOW_CTRL_RX; + tp->link_config.flowctrl |= FLOW_CTRL_RX; else - tp->link_config.flowctrl &= ~TG3_FLOW_CTRL_RX; + tp->link_config.flowctrl &= ~FLOW_CTRL_RX; if (epause->tx_pause) - tp->link_config.flowctrl |= TG3_FLOW_CTRL_TX; + tp->link_config.flowctrl |= FLOW_CTRL_TX; else - tp->link_config.flowctrl &= ~TG3_FLOW_CTRL_TX; + tp->link_config.flowctrl &= ~FLOW_CTRL_TX; if (netif_running(dev)) tg3_setup_flow_control(tp, 0, 0); @@ -9502,13 +9502,13 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam else tp->tg3_flags &= ~TG3_FLAG_PAUSE_AUTONEG; if (epause->rx_pause) - tp->link_config.flowctrl |= TG3_FLOW_CTRL_RX; + tp->link_config.flowctrl |= FLOW_CTRL_RX; else - tp->link_config.flowctrl &= ~TG3_FLOW_CTRL_RX; + tp->link_config.flowctrl &= ~FLOW_CTRL_RX; if (epause->tx_pause) - tp->link_config.flowctrl |= TG3_FLOW_CTRL_TX; + tp->link_config.flowctrl |= FLOW_CTRL_TX; else - tp->link_config.flowctrl &= ~TG3_FLOW_CTRL_TX; + tp->link_config.flowctrl &= ~FLOW_CTRL_TX; if (netif_running(dev)) { tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); @@ -13849,7 +13849,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, /* flow control autonegotiation is default behavior */ tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG; - tp->link_config.flowctrl = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX; + tp->link_config.flowctrl = FLOW_CTRL_TX | FLOW_CTRL_RX; tg3_init_coal(tp); diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 61556764a505..0880cfacdcba 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -2338,8 +2338,6 @@ struct tg3_link_config { u8 duplex; u8 autoneg; u8 flowctrl; -#define TG3_FLOW_CTRL_TX 0x01 -#define TG3_FLOW_CTRL_RX 0x02 /* Describes what we actually have. */ u8 active_flowctrl; diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 4638a7bb471e..ee2eac3047b3 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -45,8 +45,6 @@ #define SMSC95XX_INTERNAL_PHY_ID (1) #define SMSC95XX_TX_OVERHEAD (8) #define SMSC95XX_TX_OVERHEAD_CSUM (12) -#define FLOW_CTRL_TX (1) -#define FLOW_CTRL_RX (2) struct smsc95xx_priv { u32 mac_cr; diff --git a/include/linux/mii.h b/include/linux/mii.h index 151b7e0182c7..4a376e0816fd 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -135,6 +135,10 @@ #define LPA_1000FULL 0x0800 /* Link partner 1000BASE-T full duplex */ #define LPA_1000HALF 0x0400 /* Link partner 1000BASE-T half duplex */ +/* Flow control flags */ +#define FLOW_CTRL_TX 0x01 +#define FLOW_CTRL_RX 0x02 + /* This structure is used in all SIOCxMIIxxx ioctl calls */ struct mii_ioctl_data { __u16 phy_id; -- cgit v1.2.3 From bc02ff95fe4ebd3e5ee7455c0aa6f76ebe39ebca Mon Sep 17 00:00:00 2001 From: Steve Glendinning Date: Tue, 16 Dec 2008 02:00:48 -0800 Subject: net: Refactor full duplex flow control resolution These 4 drivers have identical full duplex flow control resolution functions. This patch changes them all to use one common function. The function in question decides whether a device should enable TX and RX flow control in a standard way (IEEE 802.3-2005 table 28B-3), so this should also be useful for other drivers. Signed-off-by: Steve Glendinning Signed-off-by: David S. Miller --- drivers/net/smsc911x.c | 24 +----------------------- drivers/net/smsc9420.c | 24 +----------------------- drivers/net/tg3.c | 24 +----------------------- drivers/net/usb/smsc95xx.c | 24 +----------------------- include/linux/mii.h | 29 +++++++++++++++++++++++++++++ 5 files changed, 33 insertions(+), 92 deletions(-) (limited to 'include') diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c index ae327166f978..fa28542b47d5 100644 --- a/drivers/net/smsc911x.c +++ b/drivers/net/smsc911x.c @@ -642,28 +642,6 @@ static int smsc911x_phy_loopbacktest(struct net_device *dev) } #endif /* USE_PHY_WORK_AROUND */ -static u8 smsc95xx_resolve_flowctrl_fulldplx(u16 lcladv, u16 rmtadv) -{ - u8 cap = 0; - - if (lcladv & ADVERTISE_PAUSE_CAP) { - if (lcladv & ADVERTISE_PAUSE_ASYM) { - if (rmtadv & LPA_PAUSE_CAP) - cap = FLOW_CTRL_TX | FLOW_CTRL_RX; - else if (rmtadv & LPA_PAUSE_ASYM) - cap = FLOW_CTRL_RX; - } else { - if (rmtadv & LPA_PAUSE_CAP) - cap = FLOW_CTRL_TX | FLOW_CTRL_RX; - } - } else if (lcladv & ADVERTISE_PAUSE_ASYM) { - if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM)) - cap = FLOW_CTRL_TX; - } - - return cap; -} - static void smsc911x_phy_update_flowcontrol(struct smsc911x_data *pdata) { struct phy_device *phy_dev = pdata->phy_dev; @@ -674,7 +652,7 @@ static void smsc911x_phy_update_flowcontrol(struct smsc911x_data *pdata) if (phy_dev->duplex == DUPLEX_FULL) { u16 lcladv = phy_read(phy_dev, MII_ADVERTISE); u16 rmtadv = phy_read(phy_dev, MII_LPA); - u8 cap = smsc95xx_resolve_flowctrl_fulldplx(lcladv, rmtadv); + u8 cap = mii_resolve_flowctrl_fdx(lcladv, rmtadv); if (cap & FLOW_CTRL_RX) flow = 0xFFFF0002; diff --git a/drivers/net/smsc9420.c b/drivers/net/smsc9420.c index bc9879d5f281..940220f60921 100644 --- a/drivers/net/smsc9420.c +++ b/drivers/net/smsc9420.c @@ -1080,28 +1080,6 @@ static void smsc9420_set_multicast_list(struct net_device *dev) smsc9420_pci_flush_write(pd); } -static u8 smsc9420_resolve_flowctrl_fulldplx(u16 lcladv, u16 rmtadv) -{ - u8 cap = 0; - - if (lcladv & ADVERTISE_PAUSE_CAP) { - if (lcladv & ADVERTISE_PAUSE_ASYM) { - if (rmtadv & LPA_PAUSE_CAP) - cap = FLOW_CTRL_TX | FLOW_CTRL_RX; - else if (rmtadv & LPA_PAUSE_ASYM) - cap = FLOW_CTRL_RX; - } else { - if (rmtadv & LPA_PAUSE_CAP) - cap = FLOW_CTRL_TX | FLOW_CTRL_RX; - } - } else if (lcladv & ADVERTISE_PAUSE_ASYM) { - if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM)) - cap = FLOW_CTRL_TX; - } - - return cap; -} - static void smsc9420_phy_update_flowcontrol(struct smsc9420_pdata *pd) { struct phy_device *phy_dev = pd->phy_dev; @@ -1110,7 +1088,7 @@ static void smsc9420_phy_update_flowcontrol(struct smsc9420_pdata *pd) if (phy_dev->duplex == DUPLEX_FULL) { u16 lcladv = phy_read(phy_dev, MII_ADVERTISE); u16 rmtadv = phy_read(phy_dev, MII_LPA); - u8 cap = smsc9420_resolve_flowctrl_fulldplx(lcladv, rmtadv); + u8 cap = mii_resolve_flowctrl_fdx(lcladv, rmtadv); if (cap & FLOW_CTRL_RX) flow = 0xFFFF0002; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index f353f69caeb8..06bd2f4eee6c 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -1227,28 +1227,6 @@ static u16 tg3_advert_flowctrl_1000X(u8 flow_ctrl) return miireg; } -static u8 tg3_resolve_flowctrl_1000T(u16 lcladv, u16 rmtadv) -{ - u8 cap = 0; - - if (lcladv & ADVERTISE_PAUSE_CAP) { - if (lcladv & ADVERTISE_PAUSE_ASYM) { - if (rmtadv & LPA_PAUSE_CAP) - cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX; - else if (rmtadv & LPA_PAUSE_ASYM) - cap = TG3_FLOW_CTRL_RX; - } else { - if (rmtadv & LPA_PAUSE_CAP) - cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX; - } - } else if (lcladv & ADVERTISE_PAUSE_ASYM) { - if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM)) - cap = TG3_FLOW_CTRL_TX; - } - - return cap; -} - static u8 tg3_resolve_flowctrl_1000X(u16 lcladv, u16 rmtadv) { u8 cap = 0; @@ -1288,7 +1266,7 @@ static void tg3_setup_flow_control(struct tg3 *tp, u32 lcladv, u32 rmtadv) if (tp->tg3_flags2 & TG3_FLG2_ANY_SERDES) flowctrl = tg3_resolve_flowctrl_1000X(lcladv, rmtadv); else - flowctrl = tg3_resolve_flowctrl_1000T(lcladv, rmtadv); + flowctrl = mii_resolve_flowctrl_fdx(lcladv, rmtadv); } else flowctrl = tp->link_config.flowctrl; diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index ee2eac3047b3..fed22ffedd57 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -435,28 +435,6 @@ static void smsc95xx_set_multicast(struct net_device *netdev) smsc95xx_write_reg_async(dev, MAC_CR, &pdata->mac_cr); } -static u8 smsc95xx_resolve_flowctrl_fulldplx(u16 lcladv, u16 rmtadv) -{ - u8 cap = 0; - - if (lcladv & ADVERTISE_PAUSE_CAP) { - if (lcladv & ADVERTISE_PAUSE_ASYM) { - if (rmtadv & LPA_PAUSE_CAP) - cap = FLOW_CTRL_TX | FLOW_CTRL_RX; - else if (rmtadv & LPA_PAUSE_ASYM) - cap = FLOW_CTRL_RX; - } else { - if (rmtadv & LPA_PAUSE_CAP) - cap = FLOW_CTRL_TX | FLOW_CTRL_RX; - } - } else if (lcladv & ADVERTISE_PAUSE_ASYM) { - if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM)) - cap = FLOW_CTRL_TX; - } - - return cap; -} - static void smsc95xx_phy_update_flowcontrol(struct usbnet *dev, u8 duplex, u16 lcladv, u16 rmtadv) { @@ -469,7 +447,7 @@ static void smsc95xx_phy_update_flowcontrol(struct usbnet *dev, u8 duplex, } if (duplex == DUPLEX_FULL) { - u8 cap = smsc95xx_resolve_flowctrl_fulldplx(lcladv, rmtadv); + u8 cap = mii_resolve_flowctrl_fdx(lcladv, rmtadv); if (cap & FLOW_CTRL_RX) flow = 0xFFFF0002; diff --git a/include/linux/mii.h b/include/linux/mii.h index 4a376e0816fd..ad748588faf1 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -239,5 +239,34 @@ static inline unsigned int mii_duplex (unsigned int duplex_lock, return 0; } +/** + * mii_resolve_flowctrl_fdx + * @lcladv: value of MII ADVERTISE register + * @rmtadv: value of MII LPA register + * + * Resolve full duplex flow control as per IEEE 802.3-2005 table 28B-3 + */ +static inline u8 mii_resolve_flowctrl_fdx(u16 lcladv, u16 rmtadv) +{ + u8 cap = 0; + + if (lcladv & ADVERTISE_PAUSE_CAP) { + if (lcladv & ADVERTISE_PAUSE_ASYM) { + if (rmtadv & LPA_PAUSE_CAP) + cap = FLOW_CTRL_TX | FLOW_CTRL_RX; + else if (rmtadv & LPA_PAUSE_ASYM) + cap = FLOW_CTRL_RX; + } else { + if (rmtadv & LPA_PAUSE_CAP) + cap = FLOW_CTRL_TX | FLOW_CTRL_RX; + } + } else if (lcladv & ADVERTISE_PAUSE_ASYM) { + if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM)) + cap = FLOW_CTRL_TX; + } + + return cap; +} + #endif /* __KERNEL__ */ #endif /* __LINUX_MII_H__ */ -- cgit v1.2.3 From b24a2516d10751d7ed5afb58420df25370c9dffb Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Tue, 16 Dec 2008 02:06:23 -0800 Subject: ipv6: Add IPV6_PKTINFO sticky option support to setsockopt() There are three reasons for me to add this support: 1.When no interface is specified in an IPV6_PKTINFO ancillary data item, the interface specified in an IPV6_PKTINFO sticky optionis is used. RFC3542: 6.7. Summary of Outgoing Interface Selection This document and [RFC-3493] specify various methods that affect the selection of the packet's outgoing interface. This subsection summarizes the ordering among those in order to ensure deterministic behavior. For a given outgoing packet on a given socket, the outgoing interface is determined in the following order: 1. if an interface is specified in an IPV6_PKTINFO ancillary data item, the interface is used. 2. otherwise, if an interface is specified in an IPV6_PKTINFO sticky option, the interface is used. 2.When no IPV6_PKTINFO ancillary data is received,getsockopt() should return the sticky option value which set with setsockopt(). RFC 3542: Issuing getsockopt() for the above options will return the sticky option value i.e., the value set with setsockopt(). If no sticky option value has been set getsockopt() will return the following values: 3.Make the setsockopt implementation POSIX compliant. Signed-off-by: Yang Hongyang Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + net/ipv6/ipv6_sockglue.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 641e026eee8f..0b816cae533e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -278,6 +278,7 @@ struct ipv6_pinfo { struct in6_addr saddr; struct in6_addr rcv_saddr; struct in6_addr daddr; + struct in6_pktinfo sticky_pktinfo; struct in6_addr *daddr_cache; #ifdef CONFIG_IPV6_SUBTREES struct in6_addr *saddr_cache; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 2aa294be0c79..0feaee38bc37 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -395,6 +395,28 @@ sticky_done: break; } + case IPV6_PKTINFO: + { + struct in6_pktinfo pkt; + + if (optlen == 0) + goto e_inval; + else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL) + goto e_inval; + + if (copy_from_user(&pkt, optval, optlen)) { + retv = -EFAULT; + break; + } + if (sk->sk_bound_dev_if && pkt.ipi6_ifindex != sk->sk_bound_dev_if) + goto e_inval; + + np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex; + ipv6_addr_copy(&np->sticky_pktinfo.ipi6_addr, &pkt.ipi6_addr); + retv = 0; + break; + } + case IPV6_2292PKTOPTIONS: { struct ipv6_txoptions *opt = NULL; -- cgit v1.2.3 From b93a531e315e97ef00367099e6b5f19651936e20 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 16 Dec 2008 11:40:27 +0000 Subject: allow bug table entries to use relative pointers (and use it on x86-64) Impact: reduce bug table size This allows reducing the bug table size by half. Perhaps there are other 64-bit architectures that could also make use of this. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ++++ arch/x86/include/asm/bug.h | 2 +- include/asm-generic/bug.h | 8 ++++++++ lib/bug.c | 19 +++++++++++++++++-- 4 files changed, 30 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ac22bb7719f7..ab98cca84e1b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -87,6 +87,10 @@ config GENERIC_IOMAP config GENERIC_BUG def_bool y depends on BUG + select GENERIC_BUG_RELATIVE_POINTERS if X86_64 + +config GENERIC_BUG_RELATIVE_POINTERS + bool config GENERIC_HWEIGHT def_bool y diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 3def2065fcea..d9cf1cd156d2 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -9,7 +9,7 @@ #ifdef CONFIG_X86_32 # define __BUG_C0 "2:\t.long 1b, %c0\n" #else -# define __BUG_C0 "2:\t.quad 1b, %c0\n" +# define __BUG_C0 "2:\t.long 1b - 2b, %c0 - 2b\n" #endif #define BUG() \ diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 12c07c1866b2..4c794d73fb84 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -8,9 +8,17 @@ #ifdef CONFIG_GENERIC_BUG #ifndef __ASSEMBLY__ struct bug_entry { +#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS unsigned long bug_addr; +#else + signed int bug_addr_disp; +#endif #ifdef CONFIG_DEBUG_BUGVERBOSE +#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS const char *file; +#else + signed int file_disp; +#endif unsigned short line; #endif unsigned short flags; diff --git a/lib/bug.c b/lib/bug.c index bfeafd60ee9f..300e41afbf97 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -5,6 +5,8 @@ CONFIG_BUG - emit BUG traps. Nothing happens without this. CONFIG_GENERIC_BUG - enable this code. + CONFIG_GENERIC_BUG_RELATIVE_POINTERS - use 32-bit pointers relative to + the containing struct bug_entry for bug_addr and file. CONFIG_DEBUG_BUGVERBOSE - emit full file+line information for each BUG CONFIG_BUG and CONFIG_DEBUG_BUGVERBOSE are potentially user-settable @@ -43,6 +45,15 @@ extern const struct bug_entry __start___bug_table[], __stop___bug_table[]; +static inline unsigned long bug_addr(const struct bug_entry *bug) +{ +#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS + return bug->bug_addr; +#else + return (unsigned long)bug + bug->bug_addr_disp; +#endif +} + #ifdef CONFIG_MODULES static LIST_HEAD(module_bug_list); @@ -55,7 +66,7 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr) unsigned i; for (i = 0; i < mod->num_bugs; ++i, ++bug) - if (bugaddr == bug->bug_addr) + if (bugaddr == bug_addr(bug)) return bug; } return NULL; @@ -108,7 +119,7 @@ const struct bug_entry *find_bug(unsigned long bugaddr) const struct bug_entry *bug; for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) - if (bugaddr == bug->bug_addr) + if (bugaddr == bug_addr(bug)) return bug; return module_find_bug(bugaddr); @@ -133,7 +144,11 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) if (bug) { #ifdef CONFIG_DEBUG_BUGVERBOSE +#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS file = bug->file; +#else + file = (const char *)bug + bug->file_disp; +#endif line = bug->line; #endif warning = (bug->flags & BUGFLAG_WARNING) != 0; -- cgit v1.2.3 From 8c5df16bec8a60bb8589fc232b9e26cac0ed4b2c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 16 Dec 2008 12:17:26 -0800 Subject: swiotlb: allow architectures to override swiotlb pool allocation Impact: generalize swiotlb allocation code Architectures may need to allocate memory specially for use with the swiotlb. Create the weak function swiotlb_alloc_boot() and swiotlb_alloc() defaulting to the current behaviour. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ian Campbell Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 3 +++ lib/swiotlb.c | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index b18ec5533e8c..b8c5fc766a56 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -10,6 +10,9 @@ struct scatterlist; extern void swiotlb_init(void); +extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs); +extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); + extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 5f6c629a924d..abecb2857556 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -126,6 +127,16 @@ setup_io_tlb_npages(char *str) __setup("swiotlb=", setup_io_tlb_npages); /* make io_tlb_overflow tunable too? */ +void * __weak swiotlb_alloc_boot(size_t size, unsigned long nslabs) +{ + return alloc_bootmem_low_pages(size); +} + +void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs) +{ + return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); +} + /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. @@ -145,7 +156,7 @@ swiotlb_init_with_default_size(size_t default_size) /* * Get IO TLB memory from the low pages */ - io_tlb_start = alloc_bootmem_low_pages(bytes); + io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs); if (!io_tlb_start) panic("Cannot allocate SWIOTLB buffer"); io_tlb_end = io_tlb_start + bytes; @@ -202,8 +213,7 @@ swiotlb_late_init_with_default_size(size_t default_size) bytes = io_tlb_nslabs << IO_TLB_SHIFT; while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - io_tlb_start = (char *)__get_free_pages(GFP_DMA | __GFP_NOWARN, - order); + io_tlb_start = swiotlb_alloc(order, io_tlb_nslabs); if (io_tlb_start) break; order--; -- cgit v1.2.3 From 0016fdee927f7aa0f428494bcf11ae60c7470a02 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 16 Dec 2008 12:17:27 -0800 Subject: swiotlb: move some definitions to header Impact: cleanup Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 14 ++++++++++++++ lib/swiotlb.c | 14 +------------- 2 files changed, 15 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index b8c5fc766a56..58b996a642f9 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -7,6 +7,20 @@ struct device; struct dma_attrs; struct scatterlist; +/* + * Maximum allowable number of contiguous slabs to map, + * must be a power of 2. What is the appropriate value ? + * The complexity of {map,unmap}_single is linearly dependent on this value. + */ +#define IO_TLB_SEGSIZE 128 + + +/* + * log of the size of each IO TLB slab. The number of slabs is command line + * controllable. + */ +#define IO_TLB_SHIFT 11 + extern void swiotlb_init(void); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index abecb2857556..db724ba7ebf6 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -40,19 +41,6 @@ #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg))) #define SG_ENT_PHYS_ADDRESS(sg) virt_to_bus(SG_ENT_VIRT_ADDRESS(sg)) -/* - * Maximum allowable number of contiguous slabs to map, - * must be a power of 2. What is the appropriate value ? - * The complexity of {map,unmap}_single is linearly dependent on this value. - */ -#define IO_TLB_SEGSIZE 128 - -/* - * log of the size of each IO TLB slab. The number of slabs is command line - * controllable. - */ -#define IO_TLB_SHIFT 11 - #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) /* -- cgit v1.2.3 From ecbf29cdb3990c83d90d0c4187c89fb2ce423367 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 16 Dec 2008 12:37:07 -0800 Subject: xen: clean up asm/xen/hypervisor.h Impact: cleanup hypervisor.h had accumulated a lot of crud, including lots of spurious #includes. Clean it all up, and go around fixing up everything else accordingly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/xen/hypercall.h | 6 ++++++ arch/x86/include/asm/xen/hypervisor.h | 39 +++++++---------------------------- arch/x86/include/asm/xen/page.h | 5 +++++ arch/x86/xen/enlighten.c | 1 + drivers/xen/balloon.c | 4 +++- drivers/xen/features.c | 6 +++++- drivers/xen/grant-table.c | 1 + include/xen/interface/event_channel.h | 2 ++ 8 files changed, 31 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 3f6000d95fe2..5e79ca694326 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -33,8 +33,14 @@ #ifndef _ASM_X86_XEN_HYPERCALL_H #define _ASM_X86_XEN_HYPERCALL_H +#include +#include #include #include +#include + +#include +#include #include #include diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index a38d25ac87d2..81fbd735aec4 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -33,39 +33,10 @@ #ifndef _ASM_X86_XEN_HYPERVISOR_H #define _ASM_X86_XEN_HYPERVISOR_H -#include -#include - -#include -#include - -#include -#include -#include -#if defined(__i386__) -# ifdef CONFIG_X86_PAE -# include -# else -# include -# endif -#endif -#include - /* arch/i386/kernel/setup.c */ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; -/* arch/i386/mach-xen/evtchn.c */ -/* Force a proper event-channel callback from Xen. */ -extern void force_evtchn_callback(void); - -/* Turn jiffies into Xen system time. */ -u64 jiffies_to_st(unsigned long jiffies); - - -#define MULTI_UVMFLAGS_INDEX 3 -#define MULTI_UVMDOMID_INDEX 4 - enum xen_domain_type { XEN_NATIVE, XEN_PV_DOMAIN, @@ -74,9 +45,15 @@ enum xen_domain_type { extern enum xen_domain_type xen_domain_type; +#ifdef CONFIG_XEN #define xen_domain() (xen_domain_type != XEN_NATIVE) -#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) +#else +#define xen_domain() (0) +#endif + +#define xen_pv_domain() (xen_domain() && xen_domain_type == XEN_PV_DOMAIN) +#define xen_hvm_domain() (xen_domain() && xen_domain_type == XEN_HVM_DOMAIN) + #define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN) -#define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN) #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index bc628998a1b9..7ef617ef1df3 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -1,11 +1,16 @@ #ifndef _ASM_X86_XEN_PAGE_H #define _ASM_X86_XEN_PAGE_H +#include +#include +#include #include #include +#include #include +#include #include /* Xen machine address */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 86cd2f829683..bea215230b20 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 526c191e84ea..8dc7109d61b7 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -44,13 +44,15 @@ #include #include -#include #include #include #include #include #include +#include +#include +#include #include #include #include diff --git a/drivers/xen/features.c b/drivers/xen/features.c index 0707714e40d6..99eda169c779 100644 --- a/drivers/xen/features.c +++ b/drivers/xen/features.c @@ -8,7 +8,11 @@ #include #include #include -#include + +#include + +#include +#include #include u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 06592b9da83c..7d8f531fb8e8 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h index 919b5bdcb2bd..2090881c3650 100644 --- a/include/xen/interface/event_channel.h +++ b/include/xen/interface/event_channel.h @@ -9,6 +9,8 @@ #ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ #define __XEN_PUBLIC_EVENT_CHANNEL_H__ +#include + typedef uint32_t evtchn_port_t; DEFINE_GUEST_HANDLE(evtchn_port_t); -- cgit v1.2.3 From 48a1b10aff588833b73994704c47bbd0deb73e9c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 11 Dec 2008 00:15:01 -0800 Subject: x86, sparseirq: move irq_desc according to smp_affinity, v7 Impact: improve NUMA handling by migrating irq_desc on smp_affinity changes if CONFIG_NUMA_MIGRATE_IRQ_DESC is set: - make irq_desc to go with affinity aka irq_desc moving etc - call move_irq_desc in irq_complete_move() - legacy irq_desc is not moved, because they are allocated via static array for logical apic mode, need to add move_desc_in_progress_in_same_domain, otherwise it will not be moved ==> also could need two phases to get irq_desc moved. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 9 +++ arch/x86/kernel/io_apic.c | 142 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/irq.h | 10 ++++ kernel/irq/Makefile | 1 + kernel/irq/chip.c | 12 +++- kernel/irq/handle.c | 15 +++-- kernel/irq/internals.h | 5 ++ kernel/irq/numa_migrate.c | 127 +++++++++++++++++++++++++++++++++++++++++ 8 files changed, 313 insertions(+), 8 deletions(-) create mode 100644 kernel/irq/numa_migrate.c (limited to 'include') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8943c13502c6..29073532f94c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -248,6 +248,15 @@ config SPARSE_IRQ If you don't know what to do here, say Y. +config NUMA_MIGRATE_IRQ_DESC + bool "Move irq desc when changing irq smp_affinity" + depends on SPARSE_IRQ && SMP + default n + help + This enables moving irq_desc to cpu/node that irq will use handled. + + If you don't know what to do here, say N. + config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index a1a2e070f31a..bfe1245b1a3e 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -141,6 +141,9 @@ struct irq_cfg { unsigned move_cleanup_count; u8 vector; u8 move_in_progress : 1; +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + u8 move_desc_pending : 1; +#endif }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ @@ -223,6 +226,121 @@ void arch_init_chip_data(struct irq_desc *desc, int cpu) } } +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + +static void +init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) +{ + struct irq_pin_list *old_entry, *head, *tail, *entry; + + cfg->irq_2_pin = NULL; + old_entry = old_cfg->irq_2_pin; + if (!old_entry) + return; + + entry = get_one_free_irq_2_pin(cpu); + if (!entry) + return; + + entry->apic = old_entry->apic; + entry->pin = old_entry->pin; + head = entry; + tail = entry; + old_entry = old_entry->next; + while (old_entry) { + entry = get_one_free_irq_2_pin(cpu); + if (!entry) { + entry = head; + while (entry) { + head = entry->next; + kfree(entry); + entry = head; + } + /* still use the old one */ + return; + } + entry->apic = old_entry->apic; + entry->pin = old_entry->pin; + tail->next = entry; + tail = entry; + old_entry = old_entry->next; + } + + tail->next = NULL; + cfg->irq_2_pin = head; +} + +static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) +{ + struct irq_pin_list *entry, *next; + + if (old_cfg->irq_2_pin == cfg->irq_2_pin) + return; + + entry = old_cfg->irq_2_pin; + + while (entry) { + next = entry->next; + kfree(entry); + entry = next; + } + old_cfg->irq_2_pin = NULL; +} + +void arch_init_copy_chip_data(struct irq_desc *old_desc, + struct irq_desc *desc, int cpu) +{ + struct irq_cfg *cfg; + struct irq_cfg *old_cfg; + + cfg = get_one_free_irq_cfg(cpu); + + if (!cfg) + return; + + desc->chip_data = cfg; + + old_cfg = old_desc->chip_data; + + memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); + + init_copy_irq_2_pin(old_cfg, cfg, cpu); +} + +static void free_irq_cfg(struct irq_cfg *old_cfg) +{ + kfree(old_cfg); +} + +void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) +{ + struct irq_cfg *old_cfg, *cfg; + + old_cfg = old_desc->chip_data; + cfg = desc->chip_data; + + if (old_cfg == cfg) + return; + + if (old_cfg) { + free_irq_2_pin(old_cfg, cfg); + free_irq_cfg(old_cfg); + old_desc->chip_data = NULL; + } +} + +static void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) +{ + struct irq_cfg *cfg = desc->chip_data; + + if (!cfg->move_in_progress) { + /* it means that domain is not changed */ + if (!cpus_intersects(desc->affinity, mask)) + cfg->move_desc_pending = 1; + } +} +#endif + #else static struct irq_cfg *irq_cfg(unsigned int irq) { @@ -231,9 +349,11 @@ static struct irq_cfg *irq_cfg(unsigned int irq) #endif +#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC static inline void set_extra_move_desc(struct irq_desc *desc, cpumask_t mask) { } +#endif struct io_apic { unsigned int index; @@ -2346,14 +2466,34 @@ static void irq_complete_move(struct irq_desc **descp) struct irq_cfg *cfg = desc->chip_data; unsigned vector, me; - if (likely(!cfg->move_in_progress)) + if (likely(!cfg->move_in_progress)) { +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + if (likely(!cfg->move_desc_pending)) + return; + + /* domain is not change, but affinity is changed */ + me = smp_processor_id(); + if (cpu_isset(me, desc->affinity)) { + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; + cfg->move_desc_pending = 0; + } +#endif return; + } vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { cpumask_t cleanup_mask; +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + *descp = desc = move_irq_desc(desc, me); + /* get the new one */ + cfg = desc->chip_data; +#endif + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); cfg->move_cleanup_count = cpus_weight(cleanup_mask); send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); diff --git a/include/linux/irq.h b/include/linux/irq.h index b5749db3e5a1..36a015746788 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -227,6 +227,16 @@ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu); #endif +static inline struct irq_desc * +irq_remap_to_desc(unsigned int irq, struct irq_desc *desc) +{ +#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC + return irq_to_desc(irq); +#else + return desc; +#endif +} + /* * Migration helpers for obsolete names, they will go away: */ diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 681c52dbfe22..4dd5b1edac98 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -3,3 +3,4 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o +obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 8e4fce4a1b1f..de210f4b7a92 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -353,6 +353,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc) spin_lock(&desc->lock); mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); if (unlikely(desc->status & IRQ_INPROGRESS)) goto out_unlock; @@ -430,6 +431,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) desc->status &= ~IRQ_INPROGRESS; out: desc->chip->eoi(irq); + desc = irq_remap_to_desc(irq, desc); spin_unlock(&desc->lock); } @@ -466,12 +468,14 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc) !desc->action)) { desc->status |= (IRQ_PENDING | IRQ_MASKED); mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); goto out_unlock; } kstat_incr_irqs_this_cpu(irq, desc); /* Start handling the irq */ desc->chip->ack(irq); + desc = irq_remap_to_desc(irq, desc); /* Mark the IRQ currently in progress.*/ desc->status |= IRQ_INPROGRESS; @@ -532,8 +536,10 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) if (!noirqdebug) note_interrupt(irq, desc, action_ret); - if (desc->chip->eoi) + if (desc->chip->eoi) { desc->chip->eoi(irq); + desc = irq_remap_to_desc(irq, desc); + } } void @@ -568,8 +574,10 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, /* Uninstall? */ if (handle == handle_bad_irq) { - if (desc->chip != &no_irq_chip) + if (desc->chip != &no_irq_chip) { mask_ack_irq(desc, irq); + desc = irq_remap_to_desc(irq, desc); + } desc->status |= IRQ_DISABLED; desc->depth = 1; } diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 8aa09547f5ef..f1a23069c20a 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -23,7 +23,7 @@ /* * lockdep: we want to handle all irq_desc locks as a single lock-class: */ -static struct lock_class_key irq_desc_lock_class; +struct lock_class_key irq_desc_lock_class; /** * handle_bad_irq - handle spurious and unhandled irqs @@ -73,7 +73,7 @@ static struct irq_desc irq_desc_init = { #endif }; -static void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) +void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr) { unsigned long bytes; char *ptr; @@ -113,7 +113,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu) /* * Protect the sparse_irqs: */ -static DEFINE_SPINLOCK(sparse_irq_lock); +DEFINE_SPINLOCK(sparse_irq_lock); struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly; @@ -337,8 +337,11 @@ unsigned int __do_IRQ(unsigned int irq) /* * No locking required for CPU-local interrupts: */ - if (desc->chip->ack) + if (desc->chip->ack) { desc->chip->ack(irq); + /* get new one */ + desc = irq_remap_to_desc(irq, desc); + } if (likely(!(desc->status & IRQ_DISABLED))) { action_ret = handle_IRQ_event(irq, desc->action); if (!noirqdebug) @@ -349,8 +352,10 @@ unsigned int __do_IRQ(unsigned int irq) } spin_lock(&desc->lock); - if (desc->chip->ack) + if (desc->chip->ack) { desc->chip->ack(irq); + desc = irq_remap_to_desc(irq, desc); + } /* * REPLAY is when Linux resends an IRQ that was dropped earlier * WAITING is used by probe to mark irqs that are being tested diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 64c1c7253dae..e6d0a43cc125 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -13,6 +13,11 @@ extern void compat_irq_chip_set_default_handler(struct irq_desc *desc); extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, unsigned long flags); +extern struct lock_class_key irq_desc_lock_class; +extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); +extern spinlock_t sparse_irq_lock; +extern struct irq_desc *irq_desc_ptrs[NR_IRQS]; + #ifdef CONFIG_PROC_FS extern void register_irq_proc(unsigned int irq, struct irq_desc *desc); extern void register_handler_proc(unsigned int irq, struct irqaction *action); diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c new file mode 100644 index 000000000000..0178e2296990 --- /dev/null +++ b/kernel/irq/numa_migrate.c @@ -0,0 +1,127 @@ +/* + * linux/kernel/irq/handle.c + * + * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar + * Copyright (C) 2005-2006, Thomas Gleixner, Russell King + * + * This file contains the core interrupt handling code. + * + * Detailed information is available in Documentation/DocBook/genericirq + * + */ + +#include +#include +#include +#include +#include + +#include "internals.h" + +static void init_copy_kstat_irqs(struct irq_desc *old_desc, + struct irq_desc *desc, + int cpu, int nr) +{ + unsigned long bytes; + + init_kstat_irqs(desc, cpu, nr); + + if (desc->kstat_irqs != old_desc->kstat_irqs) { + /* Compute how many bytes we need per irq and allocate them */ + bytes = nr * sizeof(unsigned int); + + memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes); + } +} + +static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc) +{ + if (old_desc->kstat_irqs == desc->kstat_irqs) + return; + + kfree(old_desc->kstat_irqs); + old_desc->kstat_irqs = NULL; +} + +static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, + struct irq_desc *desc, int cpu) +{ + memcpy(desc, old_desc, sizeof(struct irq_desc)); + desc->cpu = cpu; + lockdep_set_class(&desc->lock, &irq_desc_lock_class); + init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids); + arch_init_copy_chip_data(old_desc, desc, cpu); +} + +static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) +{ + free_kstat_irqs(old_desc, desc); + arch_free_chip_data(old_desc, desc); +} + +static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, + int cpu) +{ + struct irq_desc *desc; + unsigned int irq; + unsigned long flags; + int node; + + irq = old_desc->irq; + + spin_lock_irqsave(&sparse_irq_lock, flags); + + /* We have to check it to avoid races with another CPU */ + desc = irq_desc_ptrs[irq]; + + if (desc && old_desc != desc) + goto out_unlock; + + node = cpu_to_node(cpu); + desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); + printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n", + irq, cpu, node); + if (!desc) { + printk(KERN_ERR "can not get new irq_desc for moving\n"); + /* still use old one */ + desc = old_desc; + goto out_unlock; + } + init_copy_one_irq_desc(irq, old_desc, desc, cpu); + + irq_desc_ptrs[irq] = desc; + + /* free the old one */ + free_one_irq_desc(old_desc, desc); + kfree(old_desc); + +out_unlock: + spin_unlock_irqrestore(&sparse_irq_lock, flags); + + return desc; +} + +struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu) +{ + int old_cpu; + int node, old_node; + + /* those all static, do move them */ + if (desc->irq < NR_IRQS_LEGACY) + return desc; + + old_cpu = desc->cpu; + printk(KERN_DEBUG + "try to move irq_desc from cpu %d to %d\n", old_cpu, cpu); + if (old_cpu != cpu) { + node = cpu_to_node(cpu); + old_node = cpu_to_node(old_cpu); + if (old_node != node) + desc = __real_move_irq_desc(desc, cpu); + else + desc->cpu = cpu; + } + + return desc; +} + -- cgit v1.2.3 From b31a1d8b41513b96e9c7ec2f68c5734cef0b26a4 Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Tue, 16 Dec 2008 15:29:15 -0800 Subject: gianfar: Convert gianfar to an of_platform_driver Does the same for the accompanying MDIO driver, and then modifies the TBI configuration method. The old way used fields in einfo, which no longer exists. The new way is to create an MDIO device-tree node for each instance of gianfar, and create a tbi-handle property to associate ethernet controllers with the TBI PHYs they are connected to. Signed-off-by: Andy Fleming Signed-off-by: David S. Miller --- Documentation/powerpc/dts-bindings/fsl/tsec.txt | 12 +- arch/powerpc/boot/dts/asp834x-redboot.dts | 20 ++ arch/powerpc/boot/dts/ksi8560.dts | 20 ++ arch/powerpc/boot/dts/mpc8313erdb.dts | 20 ++ arch/powerpc/boot/dts/mpc8315erdb.dts | 19 ++ arch/powerpc/boot/dts/mpc8349emitx.dts | 18 ++ arch/powerpc/boot/dts/mpc8349emitxgp.dts | 5 + arch/powerpc/boot/dts/mpc834x_mds.dts | 19 ++ arch/powerpc/boot/dts/mpc8377_mds.dts | 19 ++ arch/powerpc/boot/dts/mpc8377_rdb.dts | 19 ++ arch/powerpc/boot/dts/mpc8378_mds.dts | 19 ++ arch/powerpc/boot/dts/mpc8378_rdb.dts | 17 ++ arch/powerpc/boot/dts/mpc8379_mds.dts | 18 ++ arch/powerpc/boot/dts/mpc8379_rdb.dts | 18 ++ arch/powerpc/boot/dts/mpc8536ds.dts | 18 ++ arch/powerpc/boot/dts/mpc8540ads.dts | 31 +++ arch/powerpc/boot/dts/mpc8541cds.dts | 18 ++ arch/powerpc/boot/dts/mpc8544ds.dts | 20 ++ arch/powerpc/boot/dts/mpc8548cds.dts | 44 ++++ arch/powerpc/boot/dts/mpc8555cds.dts | 18 ++ arch/powerpc/boot/dts/mpc8560ads.dts | 18 ++ arch/powerpc/boot/dts/mpc8568mds.dts | 18 ++ arch/powerpc/boot/dts/mpc8572ds.dts | 45 ++++ arch/powerpc/boot/dts/mpc8641_hpcn.dts | 45 ++++ arch/powerpc/boot/dts/sbc8349.dts | 18 ++ arch/powerpc/boot/dts/sbc8548.dts | 18 ++ arch/powerpc/boot/dts/sbc8560.dts | 18 ++ arch/powerpc/boot/dts/sbc8641d.dts | 44 ++++ arch/powerpc/boot/dts/stx_gp3_8560.dts | 18 ++ arch/powerpc/boot/dts/tqm8540.dts | 28 +++ arch/powerpc/boot/dts/tqm8541.dts | 18 ++ arch/powerpc/boot/dts/tqm8548-bigflash.dts | 44 ++++ arch/powerpc/boot/dts/tqm8548.dts | 44 ++++ arch/powerpc/boot/dts/tqm8555.dts | 18 ++ arch/powerpc/boot/dts/tqm8560.dts | 18 ++ arch/powerpc/sysdev/fsl_soc.c | 241 +++--------------- drivers/net/gianfar.c | 321 ++++++++++++++++-------- drivers/net/gianfar.h | 21 +- drivers/net/gianfar_ethtool.c | 22 +- drivers/net/gianfar_mii.c | 212 +++++++++++----- drivers/net/gianfar_mii.h | 2 + include/linux/fsl_devices.h | 18 +- 42 files changed, 1217 insertions(+), 424 deletions(-) (limited to 'include') diff --git a/Documentation/powerpc/dts-bindings/fsl/tsec.txt b/Documentation/powerpc/dts-bindings/fsl/tsec.txt index cf55fa4112d2..7fa4b27574b5 100644 --- a/Documentation/powerpc/dts-bindings/fsl/tsec.txt +++ b/Documentation/powerpc/dts-bindings/fsl/tsec.txt @@ -2,8 +2,8 @@ The MDIO is a bus to which the PHY devices are connected. For each device that exists on this bus, a child node should be created. See -the definition of the PHY node below for an example of how to define -a PHY. +the definition of the PHY node in booting-without-of.txt for an example +of how to define a PHY. Required properties: - reg : Offset and length of the register set for the device @@ -21,6 +21,14 @@ Example: }; }; +* TBI Internal MDIO bus + +As of this writing, every tsec is associated with an internal TBI PHY. +This PHY is accessed through the local MDIO bus. These buses are defined +similarly to the mdio buses, except they are compatible with "fsl,gianfar-tbi". +The TBI PHYs underneath them are similar to normal PHYs, but the reg property +is considered instructive, rather than descriptive. The reg property should +be chosen so it doesn't interfere with other PHYs on the bus. * Gianfar-compatible ethernet nodes diff --git a/arch/powerpc/boot/dts/asp834x-redboot.dts b/arch/powerpc/boot/dts/asp834x-redboot.dts index 6235fca445de..524af7ef9f26 100644 --- a/arch/powerpc/boot/dts/asp834x-redboot.dts +++ b/arch/powerpc/boot/dts/asp834x-redboot.dts @@ -199,8 +199,26 @@ reg = <0x2>; device_type = "ethernet-phy"; }; + + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + enet0: ethernet@24000 { cell-index = <0>; device_type = "network"; @@ -210,6 +228,7 @@ local-mac-address = [ 00 08 e5 11 32 33 ]; interrupts = <32 0x8 33 0x8 34 0x8>; interrupt-parent = <&ipic>; + tbi-handle = <&tbi0>; phy-handle = <&phy0>; linux,network-index = <0>; }; @@ -223,6 +242,7 @@ local-mac-address = [ 00 08 e5 11 32 34 ]; interrupts = <35 0x8 36 0x8 37 0x8>; interrupt-parent = <&ipic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; linux,network-index = <1>; }; diff --git a/arch/powerpc/boot/dts/ksi8560.dts b/arch/powerpc/boot/dts/ksi8560.dts index 49737589ffc8..3bfff47418db 100644 --- a/arch/powerpc/boot/dts/ksi8560.dts +++ b/arch/powerpc/boot/dts/ksi8560.dts @@ -141,8 +141,26 @@ reg = <0x2>; device_type = "ethernet-phy"; }; + + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + enet0: ethernet@24000 { device_type = "network"; model = "TSEC"; @@ -152,6 +170,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <0x1d 0x2 0x1e 0x2 0x22 0x2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&PHY1>; }; @@ -164,6 +183,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <0x23 0x2 0x24 0x2 0x28 0x2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&PHY2>; }; diff --git a/arch/powerpc/boot/dts/mpc8313erdb.dts b/arch/powerpc/boot/dts/mpc8313erdb.dts index 503031766825..d4df8b6857a4 100644 --- a/arch/powerpc/boot/dts/mpc8313erdb.dts +++ b/arch/powerpc/boot/dts/mpc8313erdb.dts @@ -190,6 +190,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <37 0x8 36 0x8 35 0x8>; interrupt-parent = <&ipic>; + tbi-handle = < &tbi0 >; phy-handle = < &phy1 >; fsl,magic-packet; @@ -210,6 +211,10 @@ reg = <0x4>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; }; @@ -222,9 +227,24 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <34 0x8 33 0x8 32 0x8>; interrupt-parent = <&ipic>; + tbi-handle = < &tbi1 >; phy-handle = < &phy4 >; sleep = <&pmc 0x10000000>; fsl,magic-packet; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + }; serial0: serial@4500 { diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts index 6b850670de1d..d2cdd47a246d 100644 --- a/arch/powerpc/boot/dts/mpc8315erdb.dts +++ b/arch/powerpc/boot/dts/mpc8315erdb.dts @@ -206,8 +206,25 @@ reg = <0x1>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; + enet0: ethernet@24000 { cell-index = <0>; device_type = "network"; @@ -217,6 +234,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <32 0x8 33 0x8 34 0x8>; interrupt-parent = <&ipic>; + tbi-handle = <&tbi0>; phy-handle = < &phy0 >; }; @@ -229,6 +247,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 0x8 36 0x8 37 0x8>; interrupt-parent = <&ipic>; + tbi-handle = <&tbi1>; phy-handle = < &phy1 >; }; diff --git a/arch/powerpc/boot/dts/mpc8349emitx.dts b/arch/powerpc/boot/dts/mpc8349emitx.dts index 4bdbaf4993a1..712783d0707e 100644 --- a/arch/powerpc/boot/dts/mpc8349emitx.dts +++ b/arch/powerpc/boot/dts/mpc8349emitx.dts @@ -184,6 +184,22 @@ reg = <0x1c>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -195,6 +211,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <32 0x8 33 0x8 34 0x8>; interrupt-parent = <&ipic>; + tbi-handle = <&tbi0>; phy-handle = <&phy1c>; linux,network-index = <0>; }; @@ -211,6 +228,7 @@ /* Vitesse 7385 isn't on the MDIO bus */ fixed-link = <1 1 1000 0 0>; linux,network-index = <1>; + tbi-handle = <&tbi1>; }; serial0: serial@4500 { diff --git a/arch/powerpc/boot/dts/mpc8349emitxgp.dts b/arch/powerpc/boot/dts/mpc8349emitxgp.dts index fa40647ee62e..3e918af41fb1 100644 --- a/arch/powerpc/boot/dts/mpc8349emitxgp.dts +++ b/arch/powerpc/boot/dts/mpc8349emitxgp.dts @@ -163,6 +163,10 @@ reg = <0x1c>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -174,6 +178,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <32 0x8 33 0x8 34 0x8>; interrupt-parent = <&ipic>; + tbi-handle = <&tbi0>; phy-handle = <&phy1c>; linux,network-index = <0>; }; diff --git a/arch/powerpc/boot/dts/mpc834x_mds.dts b/arch/powerpc/boot/dts/mpc834x_mds.dts index c986c541e9bb..d9adba01c09c 100644 --- a/arch/powerpc/boot/dts/mpc834x_mds.dts +++ b/arch/powerpc/boot/dts/mpc834x_mds.dts @@ -185,8 +185,25 @@ reg = <0x1>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; + enet0: ethernet@24000 { cell-index = <0>; device_type = "network"; @@ -196,6 +213,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <32 0x8 33 0x8 34 0x8>; interrupt-parent = <&ipic>; + tbi-handle = <&tbi0>; phy-handle = <&phy0>; linux,network-index = <0>; }; @@ -209,6 +227,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 0x8 36 0x8 37 0x8>; interrupt-parent = <&ipic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; linux,network-index = <1>; }; diff --git a/arch/powerpc/boot/dts/mpc8377_mds.dts b/arch/powerpc/boot/dts/mpc8377_mds.dts index 0484561bd2c0..1d14d7052e6d 100644 --- a/arch/powerpc/boot/dts/mpc8377_mds.dts +++ b/arch/powerpc/boot/dts/mpc8377_mds.dts @@ -193,8 +193,25 @@ reg = <0x3>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; + enet0: ethernet@24000 { cell-index = <0>; device_type = "network"; @@ -205,6 +222,7 @@ interrupts = <32 0x8 33 0x8 34 0x8>; phy-connection-type = "mii"; interrupt-parent = <&ipic>; + tbi-handle = <&tbi0>; phy-handle = <&phy2>; }; @@ -218,6 +236,7 @@ interrupts = <35 0x8 36 0x8 37 0x8>; phy-connection-type = "mii"; interrupt-parent = <&ipic>; + tbi-handle = <&tbi1>; phy-handle = <&phy3>; }; diff --git a/arch/powerpc/boot/dts/mpc8377_rdb.dts b/arch/powerpc/boot/dts/mpc8377_rdb.dts index 435ef3dd022d..31f348fdfe14 100644 --- a/arch/powerpc/boot/dts/mpc8377_rdb.dts +++ b/arch/powerpc/boot/dts/mpc8377_rdb.dts @@ -211,8 +211,25 @@ reg = <0x2>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; + enet0: ethernet@24000 { cell-index = <0>; device_type = "network"; @@ -223,6 +240,7 @@ interrupts = <32 0x8 33 0x8 34 0x8>; phy-connection-type = "mii"; interrupt-parent = <&ipic>; + tbi-handle = <&tbi0>; phy-handle = <&phy2>; }; @@ -237,6 +255,7 @@ phy-connection-type = "mii"; interrupt-parent = <&ipic>; fixed-link = <1 1 1000 0 0>; + tbi-handle = <&tbi1>; }; serial0: serial@4500 { diff --git a/arch/powerpc/boot/dts/mpc8378_mds.dts b/arch/powerpc/boot/dts/mpc8378_mds.dts index 67a08d2e2ff2..b85fc02682d2 100644 --- a/arch/powerpc/boot/dts/mpc8378_mds.dts +++ b/arch/powerpc/boot/dts/mpc8378_mds.dts @@ -232,8 +232,25 @@ reg = <0x3>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; + enet0: ethernet@24000 { cell-index = <0>; device_type = "network"; @@ -244,6 +261,7 @@ interrupts = <32 0x8 33 0x8 34 0x8>; phy-connection-type = "mii"; interrupt-parent = <&ipic>; + tbi-handle = <&tbi0>; phy-handle = <&phy2>; }; @@ -257,6 +275,7 @@ interrupts = <35 0x8 36 0x8 37 0x8>; phy-connection-type = "mii"; interrupt-parent = <&ipic>; + tbi-handle = <&tbi1>; phy-handle = <&phy3>; }; diff --git a/arch/powerpc/boot/dts/mpc8378_rdb.dts b/arch/powerpc/boot/dts/mpc8378_rdb.dts index b11e68f56a06..7a2bad038bd6 100644 --- a/arch/powerpc/boot/dts/mpc8378_rdb.dts +++ b/arch/powerpc/boot/dts/mpc8378_rdb.dts @@ -211,8 +211,25 @@ reg = <0x2>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; + enet0: ethernet@24000 { cell-index = <0>; device_type = "network"; diff --git a/arch/powerpc/boot/dts/mpc8379_mds.dts b/arch/powerpc/boot/dts/mpc8379_mds.dts index 323370a2b5ff..acf06c438dbf 100644 --- a/arch/powerpc/boot/dts/mpc8379_mds.dts +++ b/arch/powerpc/boot/dts/mpc8379_mds.dts @@ -232,6 +232,22 @@ reg = <0x3>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -244,6 +260,7 @@ interrupts = <32 0x8 33 0x8 34 0x8>; phy-connection-type = "mii"; interrupt-parent = <&ipic>; + tbi-handle = <&tbi0>; phy-handle = <&phy2>; }; @@ -257,6 +274,7 @@ interrupts = <35 0x8 36 0x8 37 0x8>; phy-connection-type = "mii"; interrupt-parent = <&ipic>; + tbi-handle = <&tbi1>; phy-handle = <&phy3>; }; diff --git a/arch/powerpc/boot/dts/mpc8379_rdb.dts b/arch/powerpc/boot/dts/mpc8379_rdb.dts index 337af6ea26d3..e067616f3f42 100644 --- a/arch/powerpc/boot/dts/mpc8379_rdb.dts +++ b/arch/powerpc/boot/dts/mpc8379_rdb.dts @@ -211,6 +211,22 @@ reg = <0x2>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -223,6 +239,7 @@ interrupts = <32 0x8 33 0x8 34 0x8>; phy-connection-type = "mii"; interrupt-parent = <&ipic>; + tbi-handle = <&tbi0>; phy-handle = <&phy2>; }; @@ -237,6 +254,7 @@ phy-connection-type = "mii"; interrupt-parent = <&ipic>; fixed-link = <1 1 1000 0 0>; + tbi-handle = <&tbi1>; }; serial0: serial@4500 { diff --git a/arch/powerpc/boot/dts/mpc8536ds.dts b/arch/powerpc/boot/dts/mpc8536ds.dts index 35db1e5440c7..3c905df1812c 100644 --- a/arch/powerpc/boot/dts/mpc8536ds.dts +++ b/arch/powerpc/boot/dts/mpc8536ds.dts @@ -155,6 +155,22 @@ reg = <1>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@26520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x26520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; usb@22000 { @@ -186,6 +202,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy1>; phy-connection-type = "rgmii-id"; }; @@ -199,6 +216,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <31 2 32 2 33 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy0>; phy-connection-type = "rgmii-id"; }; diff --git a/arch/powerpc/boot/dts/mpc8540ads.dts b/arch/powerpc/boot/dts/mpc8540ads.dts index 9568bfaff8f7..79570ffe41b9 100644 --- a/arch/powerpc/boot/dts/mpc8540ads.dts +++ b/arch/powerpc/boot/dts/mpc8540ads.dts @@ -150,6 +150,34 @@ reg = <0x3>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@26520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x26520 0x20>; + + tbi2: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -161,6 +189,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy0>; }; @@ -173,6 +202,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; }; @@ -185,6 +215,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <41 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi2>; phy-handle = <&phy3>; }; diff --git a/arch/powerpc/boot/dts/mpc8541cds.dts b/arch/powerpc/boot/dts/mpc8541cds.dts index 6480f4fd96e0..221036a8ce23 100644 --- a/arch/powerpc/boot/dts/mpc8541cds.dts +++ b/arch/powerpc/boot/dts/mpc8541cds.dts @@ -144,6 +144,22 @@ reg = <0x1>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -155,6 +171,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy0>; }; @@ -167,6 +184,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; }; diff --git a/arch/powerpc/boot/dts/mpc8544ds.dts b/arch/powerpc/boot/dts/mpc8544ds.dts index f1fb20737e3e..b9da42105066 100644 --- a/arch/powerpc/boot/dts/mpc8544ds.dts +++ b/arch/powerpc/boot/dts/mpc8544ds.dts @@ -116,8 +116,26 @@ reg = <0x1>; device_type = "ethernet-phy"; }; + + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; + mdio@26520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x26520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + dma@21300 { #address-cells = <1>; #size-cells = <1>; @@ -169,6 +187,7 @@ interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; phy-handle = <&phy0>; + tbi-handle = <&tbi0>; phy-connection-type = "rgmii-id"; }; @@ -182,6 +201,7 @@ interrupts = <31 2 32 2 33 2>; interrupt-parent = <&mpic>; phy-handle = <&phy1>; + tbi-handle = <&tbi1>; phy-connection-type = "rgmii-id"; }; diff --git a/arch/powerpc/boot/dts/mpc8548cds.dts b/arch/powerpc/boot/dts/mpc8548cds.dts index 431b496270dc..df774a7088ff 100644 --- a/arch/powerpc/boot/dts/mpc8548cds.dts +++ b/arch/powerpc/boot/dts/mpc8548cds.dts @@ -172,6 +172,46 @@ reg = <0x3>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@26520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x26520 0x20>; + + tbi2: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@27520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x27520 0x20>; + + tbi3: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -183,6 +223,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy0>; }; @@ -195,6 +236,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; }; @@ -208,6 +250,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <31 2 32 2 33 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi2>; phy-handle = <&phy2>; }; @@ -220,6 +263,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <37 2 38 2 39 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi3>; phy-handle = <&phy3>; }; */ diff --git a/arch/powerpc/boot/dts/mpc8555cds.dts b/arch/powerpc/boot/dts/mpc8555cds.dts index d833a5c4f476..053b01e1c93b 100644 --- a/arch/powerpc/boot/dts/mpc8555cds.dts +++ b/arch/powerpc/boot/dts/mpc8555cds.dts @@ -144,6 +144,22 @@ reg = <0x1>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -155,6 +171,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy0>; }; @@ -167,6 +184,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; }; diff --git a/arch/powerpc/boot/dts/mpc8560ads.dts b/arch/powerpc/boot/dts/mpc8560ads.dts index 4d1f2f284094..11b1bcbe14ce 100644 --- a/arch/powerpc/boot/dts/mpc8560ads.dts +++ b/arch/powerpc/boot/dts/mpc8560ads.dts @@ -145,6 +145,22 @@ reg = <0x3>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -156,6 +172,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy0>; }; @@ -168,6 +185,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; }; diff --git a/arch/powerpc/boot/dts/mpc8568mds.dts b/arch/powerpc/boot/dts/mpc8568mds.dts index c80158f7741d..1955bd9e113d 100644 --- a/arch/powerpc/boot/dts/mpc8568mds.dts +++ b/arch/powerpc/boot/dts/mpc8568mds.dts @@ -179,6 +179,22 @@ reg = <0x3>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -190,6 +206,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy2>; }; @@ -202,6 +219,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy3>; }; diff --git a/arch/powerpc/boot/dts/mpc8572ds.dts b/arch/powerpc/boot/dts/mpc8572ds.dts index 5c69b2fafd32..05f67253b49f 100644 --- a/arch/powerpc/boot/dts/mpc8572ds.dts +++ b/arch/powerpc/boot/dts/mpc8572ds.dts @@ -225,6 +225,47 @@ interrupts = <10 1>; reg = <0x3>; }; + + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@26520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x26520 0x20>; + + tbi2: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@27520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x27520 0x20>; + + tbi3: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -236,6 +277,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy0>; phy-connection-type = "rgmii-id"; }; @@ -249,6 +291,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; phy-connection-type = "rgmii-id"; }; @@ -262,6 +305,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <31 2 32 2 33 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi2>; phy-handle = <&phy2>; phy-connection-type = "rgmii-id"; }; @@ -275,6 +319,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <37 2 38 2 39 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi3>; phy-handle = <&phy3>; phy-connection-type = "rgmii-id"; }; diff --git a/arch/powerpc/boot/dts/mpc8641_hpcn.dts b/arch/powerpc/boot/dts/mpc8641_hpcn.dts index d665e767822a..35d5e248ccd7 100644 --- a/arch/powerpc/boot/dts/mpc8641_hpcn.dts +++ b/arch/powerpc/boot/dts/mpc8641_hpcn.dts @@ -205,8 +205,49 @@ reg = <3>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@26520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x26520 0x20>; + + tbi2: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@27520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x27520 0x20>; + + tbi3: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; + enet0: ethernet@24000 { cell-index = <0>; device_type = "network"; @@ -216,6 +257,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy0>; phy-connection-type = "rgmii-id"; }; @@ -229,6 +271,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; phy-connection-type = "rgmii-id"; }; @@ -242,6 +285,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <31 2 32 2 33 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi2>; phy-handle = <&phy2>; phy-connection-type = "rgmii-id"; }; @@ -255,6 +299,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <37 2 38 2 39 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi3>; phy-handle = <&phy3>; phy-connection-type = "rgmii-id"; }; diff --git a/arch/powerpc/boot/dts/sbc8349.dts b/arch/powerpc/boot/dts/sbc8349.dts index 0f941f310e44..8d365a57ebc1 100644 --- a/arch/powerpc/boot/dts/sbc8349.dts +++ b/arch/powerpc/boot/dts/sbc8349.dts @@ -177,6 +177,22 @@ reg = <0x1a>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -188,6 +204,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <32 0x8 33 0x8 34 0x8>; interrupt-parent = <&ipic>; + tbi-handle = <&tbi0>; phy-handle = <&phy0>; linux,network-index = <0>; }; @@ -201,6 +218,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 0x8 36 0x8 37 0x8>; interrupt-parent = <&ipic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; linux,network-index = <1>; }; diff --git a/arch/powerpc/boot/dts/sbc8548.dts b/arch/powerpc/boot/dts/sbc8548.dts index 333552b4e90d..2baf4a51f224 100644 --- a/arch/powerpc/boot/dts/sbc8548.dts +++ b/arch/powerpc/boot/dts/sbc8548.dts @@ -252,6 +252,22 @@ reg = <0x1a>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -263,6 +279,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <0x1d 0x2 0x1e 0x2 0x22 0x2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy0>; }; @@ -275,6 +292,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <0x23 0x2 0x24 0x2 0x28 0x2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; }; diff --git a/arch/powerpc/boot/dts/sbc8560.dts b/arch/powerpc/boot/dts/sbc8560.dts index db3632ef9888..01542f7062ab 100644 --- a/arch/powerpc/boot/dts/sbc8560.dts +++ b/arch/powerpc/boot/dts/sbc8560.dts @@ -168,6 +168,22 @@ reg = <0x1c>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -179,6 +195,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <0x1d 0x2 0x1e 0x2 0x22 0x2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy0>; }; @@ -191,6 +208,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <0x23 0x2 0x24 0x2 0x28 0x2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; }; diff --git a/arch/powerpc/boot/dts/sbc8641d.dts b/arch/powerpc/boot/dts/sbc8641d.dts index 9652456158fb..36db981548e4 100644 --- a/arch/powerpc/boot/dts/sbc8641d.dts +++ b/arch/powerpc/boot/dts/sbc8641d.dts @@ -222,6 +222,46 @@ reg = <2>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@26520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x26520 0x20>; + + tbi2: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@27520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x27520 0x20>; + + tbi3: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -233,6 +273,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy0>; phy-connection-type = "rgmii-id"; }; @@ -246,6 +287,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; phy-connection-type = "rgmii-id"; }; @@ -259,6 +301,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <31 2 32 2 33 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi2>; phy-handle = <&phy2>; phy-connection-type = "rgmii-id"; }; @@ -272,6 +315,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <37 2 38 2 39 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi3>; phy-handle = <&phy3>; phy-connection-type = "rgmii-id"; }; diff --git a/arch/powerpc/boot/dts/stx_gp3_8560.dts b/arch/powerpc/boot/dts/stx_gp3_8560.dts index fcd1db6ca0a8..fff33fe6efc6 100644 --- a/arch/powerpc/boot/dts/stx_gp3_8560.dts +++ b/arch/powerpc/boot/dts/stx_gp3_8560.dts @@ -142,6 +142,22 @@ reg = <4>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -153,6 +169,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy2>; }; @@ -165,6 +182,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy4>; }; diff --git a/arch/powerpc/boot/dts/tqm8540.dts b/arch/powerpc/boot/dts/tqm8540.dts index e1d260b9085e..a693f01c21aa 100644 --- a/arch/powerpc/boot/dts/tqm8540.dts +++ b/arch/powerpc/boot/dts/tqm8540.dts @@ -155,6 +155,34 @@ reg = <3>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@26520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x26520 0x20>; + + tbi2: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { diff --git a/arch/powerpc/boot/dts/tqm8541.dts b/arch/powerpc/boot/dts/tqm8541.dts index d76441ec5dc7..9e3f5f0dde20 100644 --- a/arch/powerpc/boot/dts/tqm8541.dts +++ b/arch/powerpc/boot/dts/tqm8541.dts @@ -154,6 +154,22 @@ reg = <3>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -165,6 +181,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy2>; }; @@ -177,6 +194,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; }; diff --git a/arch/powerpc/boot/dts/tqm8548-bigflash.dts b/arch/powerpc/boot/dts/tqm8548-bigflash.dts index 4199e89b4e50..15086eb65c50 100644 --- a/arch/powerpc/boot/dts/tqm8548-bigflash.dts +++ b/arch/powerpc/boot/dts/tqm8548-bigflash.dts @@ -179,6 +179,46 @@ reg = <5>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@26520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x26520 0x20>; + + tbi2: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@27520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x27520 0x20>; + + tbi3: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -190,6 +230,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy2>; }; @@ -202,6 +243,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; }; @@ -214,6 +256,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <31 2 32 2 33 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi2>; phy-handle = <&phy3>; }; @@ -226,6 +269,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <37 2 38 2 39 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi3>; phy-handle = <&phy4>; }; diff --git a/arch/powerpc/boot/dts/tqm8548.dts b/arch/powerpc/boot/dts/tqm8548.dts index 58ee4185454b..b7b65f5e79b6 100644 --- a/arch/powerpc/boot/dts/tqm8548.dts +++ b/arch/powerpc/boot/dts/tqm8548.dts @@ -179,6 +179,46 @@ reg = <5>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@26520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x26520 0x20>; + + tbi2: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@27520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x27520 0x20>; + + tbi3: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -190,6 +230,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy2>; }; @@ -202,6 +243,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; }; @@ -214,6 +256,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <31 2 32 2 33 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi2>; phy-handle = <&phy3>; }; @@ -226,6 +269,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <37 2 38 2 39 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi3>; phy-handle = <&phy4>; }; diff --git a/arch/powerpc/boot/dts/tqm8555.dts b/arch/powerpc/boot/dts/tqm8555.dts index 6f7ea59c4846..cf92b4e7945e 100644 --- a/arch/powerpc/boot/dts/tqm8555.dts +++ b/arch/powerpc/boot/dts/tqm8555.dts @@ -154,6 +154,22 @@ reg = <3>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -165,6 +181,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy2>; }; @@ -177,6 +194,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; }; diff --git a/arch/powerpc/boot/dts/tqm8560.dts b/arch/powerpc/boot/dts/tqm8560.dts index 3fe35208907b..9e1ab2d2f669 100644 --- a/arch/powerpc/boot/dts/tqm8560.dts +++ b/arch/powerpc/boot/dts/tqm8560.dts @@ -156,6 +156,22 @@ reg = <3>; device_type = "ethernet-phy"; }; + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + mdio@25520 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,gianfar-tbi"; + reg = <0x25520 0x20>; + + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; }; enet0: ethernet@24000 { @@ -167,6 +183,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <29 2 30 2 34 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi0>; phy-handle = <&phy2>; }; @@ -179,6 +196,7 @@ local-mac-address = [ 00 00 00 00 00 00 ]; interrupts = <35 2 36 2 40 2>; interrupt-parent = <&mpic>; + tbi-handle = <&tbi1>; phy-handle = <&phy1>; }; diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 26ecb96f9731..115cb16351fd 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -207,236 +207,51 @@ static int __init of_add_fixed_phys(void) arch_initcall(of_add_fixed_phys); #endif /* CONFIG_FIXED_PHY */ -static int gfar_mdio_of_init_one(struct device_node *np) -{ - int k; - struct device_node *child = NULL; - struct gianfar_mdio_data mdio_data; - struct platform_device *mdio_dev; - struct resource res; - int ret; - - memset(&res, 0, sizeof(res)); - memset(&mdio_data, 0, sizeof(mdio_data)); - - ret = of_address_to_resource(np, 0, &res); - if (ret) - return ret; - - /* The gianfar device will try to use the same ID created below to find - * this bus, to coordinate register access (since they share). */ - mdio_dev = platform_device_register_simple("fsl-gianfar_mdio", - res.start&0xfffff, &res, 1); - if (IS_ERR(mdio_dev)) - return PTR_ERR(mdio_dev); - - for (k = 0; k < 32; k++) - mdio_data.irq[k] = PHY_POLL; - - while ((child = of_get_next_child(np, child)) != NULL) { - int irq = irq_of_parse_and_map(child, 0); - if (irq != NO_IRQ) { - const u32 *id = of_get_property(child, "reg", NULL); - mdio_data.irq[*id] = irq; - } - } - - ret = platform_device_add_data(mdio_dev, &mdio_data, - sizeof(struct gianfar_mdio_data)); - if (ret) - platform_device_unregister(mdio_dev); - - return ret; -} - -static int __init gfar_mdio_of_init(void) -{ - struct device_node *np = NULL; - - for_each_compatible_node(np, NULL, "fsl,gianfar-mdio") - gfar_mdio_of_init_one(np); - - /* try the deprecated version */ - for_each_compatible_node(np, "mdio", "gianfar"); - gfar_mdio_of_init_one(np); - - return 0; -} - -arch_initcall(gfar_mdio_of_init); - -static const char *gfar_tx_intr = "tx"; -static const char *gfar_rx_intr = "rx"; -static const char *gfar_err_intr = "error"; - -static int __init gfar_of_init(void) +#ifdef CONFIG_PPC_83xx +static int __init mpc83xx_wdt_init(void) { + struct resource r; struct device_node *np; - unsigned int i; - struct platform_device *gfar_dev; - struct resource res; + struct platform_device *dev; + u32 freq = fsl_get_sys_freq(); int ret; - for (np = NULL, i = 0; - (np = of_find_compatible_node(np, "network", "gianfar")) != NULL; - i++) { - struct resource r[4]; - struct device_node *phy, *mdio; - struct gianfar_platform_data gfar_data; - const unsigned int *id; - const char *model; - const char *ctype; - const void *mac_addr; - const phandle *ph; - int n_res = 2; - - if (!of_device_is_available(np)) - continue; - - memset(r, 0, sizeof(r)); - memset(&gfar_data, 0, sizeof(gfar_data)); - - ret = of_address_to_resource(np, 0, &r[0]); - if (ret) - goto err; - - of_irq_to_resource(np, 0, &r[1]); - - model = of_get_property(np, "model", NULL); - - /* If we aren't the FEC we have multiple interrupts */ - if (model && strcasecmp(model, "FEC")) { - r[1].name = gfar_tx_intr; - - r[2].name = gfar_rx_intr; - of_irq_to_resource(np, 1, &r[2]); + np = of_find_compatible_node(NULL, "watchdog", "mpc83xx_wdt"); - r[3].name = gfar_err_intr; - of_irq_to_resource(np, 2, &r[3]); - - n_res += 2; - } - - gfar_dev = - platform_device_register_simple("fsl-gianfar", i, &r[0], - n_res); - - if (IS_ERR(gfar_dev)) { - ret = PTR_ERR(gfar_dev); - goto err; - } - - mac_addr = of_get_mac_address(np); - if (mac_addr) - memcpy(gfar_data.mac_addr, mac_addr, 6); - - if (model && !strcasecmp(model, "TSEC")) - gfar_data.device_flags = - FSL_GIANFAR_DEV_HAS_GIGABIT | - FSL_GIANFAR_DEV_HAS_COALESCE | - FSL_GIANFAR_DEV_HAS_RMON | - FSL_GIANFAR_DEV_HAS_MULTI_INTR; - if (model && !strcasecmp(model, "eTSEC")) - gfar_data.device_flags = - FSL_GIANFAR_DEV_HAS_GIGABIT | - FSL_GIANFAR_DEV_HAS_COALESCE | - FSL_GIANFAR_DEV_HAS_RMON | - FSL_GIANFAR_DEV_HAS_MULTI_INTR | - FSL_GIANFAR_DEV_HAS_CSUM | - FSL_GIANFAR_DEV_HAS_VLAN | - FSL_GIANFAR_DEV_HAS_EXTENDED_HASH; - - ctype = of_get_property(np, "phy-connection-type", NULL); - - /* We only care about rgmii-id. The rest are autodetected */ - if (ctype && !strcmp(ctype, "rgmii-id")) - gfar_data.interface = PHY_INTERFACE_MODE_RGMII_ID; - else - gfar_data.interface = PHY_INTERFACE_MODE_MII; - - if (of_get_property(np, "fsl,magic-packet", NULL)) - gfar_data.device_flags |= FSL_GIANFAR_DEV_HAS_MAGIC_PACKET; - - ph = of_get_property(np, "phy-handle", NULL); - if (ph == NULL) { - u32 *fixed_link; - - fixed_link = (u32 *)of_get_property(np, "fixed-link", - NULL); - if (!fixed_link) { - ret = -ENODEV; - goto unreg; - } - - snprintf(gfar_data.bus_id, MII_BUS_ID_SIZE, "0"); - gfar_data.phy_id = fixed_link[0]; - } else { - phy = of_find_node_by_phandle(*ph); - - if (phy == NULL) { - ret = -ENODEV; - goto unreg; - } - - mdio = of_get_parent(phy); - - id = of_get_property(phy, "reg", NULL); - ret = of_address_to_resource(mdio, 0, &res); - if (ret) { - of_node_put(phy); - of_node_put(mdio); - goto unreg; - } - - gfar_data.phy_id = *id; - snprintf(gfar_data.bus_id, MII_BUS_ID_SIZE, "%llx", - (unsigned long long)res.start&0xfffff); + if (!np) { + ret = -ENODEV; + goto nodev; + } - of_node_put(phy); - of_node_put(mdio); - } + memset(&r, 0, sizeof(r)); - /* Get MDIO bus controlled by this eTSEC, if any. Normally only - * eTSEC 1 will control an MDIO bus, not necessarily the same - * bus that its PHY is on ('mdio' above), so we can't just use - * that. What we do is look for a gianfar mdio device that has - * overlapping registers with this device. That's really the - * whole point, to find the device sharing our registers to - * coordinate access with it. - */ - for_each_compatible_node(mdio, NULL, "fsl,gianfar-mdio") { - if (of_address_to_resource(mdio, 0, &res)) - continue; - - if (res.start >= r[0].start && res.end <= r[0].end) { - /* Get the ID the mdio bus platform device was - * registered with. gfar_data.bus_id is - * different because it's for finding a PHY, - * while this is for finding a MII bus. - */ - gfar_data.mdio_bus = res.start&0xfffff; - of_node_put(mdio); - break; - } - } + ret = of_address_to_resource(np, 0, &r); + if (ret) + goto err; - ret = - platform_device_add_data(gfar_dev, &gfar_data, - sizeof(struct - gianfar_platform_data)); - if (ret) - goto unreg; + dev = platform_device_register_simple("mpc83xx_wdt", 0, &r, 1); + if (IS_ERR(dev)) { + ret = PTR_ERR(dev); + goto err; } + ret = platform_device_add_data(dev, &freq, sizeof(freq)); + if (ret) + goto unreg; + + of_node_put(np); return 0; unreg: - platform_device_unregister(gfar_dev); + platform_device_unregister(dev); err: + of_node_put(np); +nodev: return ret; } -arch_initcall(gfar_of_init); +arch_initcall(mpc83xx_wdt_init); +#endif static enum fsl_usb2_phy_modes determine_usb_phy(const char *phy_type) { diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 55e319fa7fe6..7398704c4b55 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -25,11 +25,8 @@ * * Theory of operation * - * The driver is initialized through platform_device. Structures which - * define the configuration needed by the board are defined in a - * board structure in arch/ppc/platforms (though I do not - * discount the possibility that other architectures could one - * day be supported. + * The driver is initialized through of_device. Configuration information + * is therefore conveyed through an OF-style device tree. * * The Gianfar Ethernet Controller uses a ring of buffer * descriptors. The beginning is indicated by a register @@ -78,7 +75,7 @@ #include #include #include -#include +#include #include #include #include @@ -92,6 +89,8 @@ #include #include #include +#include +#include #include "gianfar.h" #include "gianfar_mii.h" @@ -119,8 +118,9 @@ static irqreturn_t gfar_interrupt(int irq, void *dev_id); static void adjust_link(struct net_device *dev); static void init_registers(struct net_device *dev); static int init_phy(struct net_device *dev); -static int gfar_probe(struct platform_device *pdev); -static int gfar_remove(struct platform_device *pdev); +static int gfar_probe(struct of_device *ofdev, + const struct of_device_id *match); +static int gfar_remove(struct of_device *ofdev); static void free_skb_resources(struct gfar_private *priv); static void gfar_set_multi(struct net_device *dev); static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr); @@ -152,25 +152,158 @@ static inline int gfar_uses_fcb(struct gfar_private *priv) return (priv->vlan_enable || priv->rx_csum_enable); } +static int gfar_of_init(struct net_device *dev) +{ + struct device_node *phy, *mdio; + const unsigned int *id; + const char *model; + const char *ctype; + const void *mac_addr; + const phandle *ph; + u64 addr, size; + int err = 0; + struct gfar_private *priv = netdev_priv(dev); + struct device_node *np = priv->node; + char bus_name[MII_BUS_ID_SIZE]; + + if (!np || !of_device_is_available(np)) + return -ENODEV; + + /* get a pointer to the register memory */ + addr = of_translate_address(np, of_get_address(np, 0, &size, NULL)); + priv->regs = ioremap(addr, size); + + if (priv->regs == NULL) + return -ENOMEM; + + priv->interruptTransmit = irq_of_parse_and_map(np, 0); + + model = of_get_property(np, "model", NULL); + + /* If we aren't the FEC we have multiple interrupts */ + if (model && strcasecmp(model, "FEC")) { + priv->interruptReceive = irq_of_parse_and_map(np, 1); + + priv->interruptError = irq_of_parse_and_map(np, 2); + + if (priv->interruptTransmit < 0 || + priv->interruptReceive < 0 || + priv->interruptError < 0) { + err = -EINVAL; + goto err_out; + } + } + + mac_addr = of_get_mac_address(np); + if (mac_addr) + memcpy(dev->dev_addr, mac_addr, MAC_ADDR_LEN); + + if (model && !strcasecmp(model, "TSEC")) + priv->device_flags = + FSL_GIANFAR_DEV_HAS_GIGABIT | + FSL_GIANFAR_DEV_HAS_COALESCE | + FSL_GIANFAR_DEV_HAS_RMON | + FSL_GIANFAR_DEV_HAS_MULTI_INTR; + if (model && !strcasecmp(model, "eTSEC")) + priv->device_flags = + FSL_GIANFAR_DEV_HAS_GIGABIT | + FSL_GIANFAR_DEV_HAS_COALESCE | + FSL_GIANFAR_DEV_HAS_RMON | + FSL_GIANFAR_DEV_HAS_MULTI_INTR | + FSL_GIANFAR_DEV_HAS_CSUM | + FSL_GIANFAR_DEV_HAS_VLAN | + FSL_GIANFAR_DEV_HAS_MAGIC_PACKET | + FSL_GIANFAR_DEV_HAS_EXTENDED_HASH; + + ctype = of_get_property(np, "phy-connection-type", NULL); + + /* We only care about rgmii-id. The rest are autodetected */ + if (ctype && !strcmp(ctype, "rgmii-id")) + priv->interface = PHY_INTERFACE_MODE_RGMII_ID; + else + priv->interface = PHY_INTERFACE_MODE_MII; + + if (of_get_property(np, "fsl,magic-packet", NULL)) + priv->device_flags |= FSL_GIANFAR_DEV_HAS_MAGIC_PACKET; + + ph = of_get_property(np, "phy-handle", NULL); + if (ph == NULL) { + u32 *fixed_link; + + fixed_link = (u32 *)of_get_property(np, "fixed-link", NULL); + if (!fixed_link) { + err = -ENODEV; + goto err_out; + } + + snprintf(priv->phy_bus_id, BUS_ID_SIZE, PHY_ID_FMT, "0", + fixed_link[0]); + } else { + phy = of_find_node_by_phandle(*ph); + + if (phy == NULL) { + err = -ENODEV; + goto err_out; + } + + mdio = of_get_parent(phy); + + id = of_get_property(phy, "reg", NULL); + + of_node_put(phy); + of_node_put(mdio); + + gfar_mdio_bus_name(bus_name, mdio); + snprintf(priv->phy_bus_id, BUS_ID_SIZE, "%s:%02x", + bus_name, *id); + } + + /* Find the TBI PHY. If it's not there, we don't support SGMII */ + ph = of_get_property(np, "tbi-handle", NULL); + if (ph) { + struct device_node *tbi = of_find_node_by_phandle(*ph); + struct of_device *ofdev; + struct mii_bus *bus; + + if (!tbi) + return 0; + + mdio = of_get_parent(tbi); + if (!mdio) + return 0; + + ofdev = of_find_device_by_node(mdio); + + of_node_put(mdio); + + id = of_get_property(tbi, "reg", NULL); + if (!id) + return 0; + + of_node_put(tbi); + + bus = dev_get_drvdata(&ofdev->dev); + + priv->tbiphy = bus->phy_map[*id]; + } + + return 0; + +err_out: + iounmap(priv->regs); + return err; +} + /* Set up the ethernet device structure, private data, * and anything else we need before we start */ -static int gfar_probe(struct platform_device *pdev) +static int gfar_probe(struct of_device *ofdev, + const struct of_device_id *match) { u32 tempval; struct net_device *dev = NULL; struct gfar_private *priv = NULL; - struct gianfar_platform_data *einfo; - struct resource *r; - int err = 0, irq; - - einfo = (struct gianfar_platform_data *) pdev->dev.platform_data; - - if (NULL == einfo) { - printk(KERN_ERR "gfar %d: Missing additional data!\n", - pdev->id); - - return -ENODEV; - } + int err = 0; + DECLARE_MAC_BUF(mac); /* Create an ethernet device instance */ dev = alloc_etherdev(sizeof (*priv)); @@ -180,48 +313,19 @@ static int gfar_probe(struct platform_device *pdev) priv = netdev_priv(dev); priv->dev = dev; + priv->node = ofdev->node; - /* Set the info in the priv to the current info */ - priv->einfo = einfo; - - /* fill out IRQ fields */ - if (einfo->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) { - irq = platform_get_irq_byname(pdev, "tx"); - if (irq < 0) - goto regs_fail; - priv->interruptTransmit = irq; - - irq = platform_get_irq_byname(pdev, "rx"); - if (irq < 0) - goto regs_fail; - priv->interruptReceive = irq; - - irq = platform_get_irq_byname(pdev, "error"); - if (irq < 0) - goto regs_fail; - priv->interruptError = irq; - } else { - irq = platform_get_irq(pdev, 0); - if (irq < 0) - goto regs_fail; - priv->interruptTransmit = irq; - } - - /* get a pointer to the register memory */ - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); - priv->regs = ioremap(r->start, sizeof (struct gfar)); + err = gfar_of_init(dev); - if (NULL == priv->regs) { - err = -ENOMEM; + if (err) goto regs_fail; - } spin_lock_init(&priv->txlock); spin_lock_init(&priv->rxlock); spin_lock_init(&priv->bflock); INIT_WORK(&priv->reset_task, gfar_reset_task); - platform_set_drvdata(pdev, dev); + dev_set_drvdata(&ofdev->dev, priv); /* Stop the DMA engine now, in case it was running before */ /* (The firmware could have used it, and left it running). */ @@ -239,13 +343,10 @@ static int gfar_probe(struct platform_device *pdev) /* Initialize ECNTRL */ gfar_write(&priv->regs->ecntrl, ECNTRL_INIT_SETTINGS); - /* Copy the station address into the dev structure, */ - memcpy(dev->dev_addr, einfo->mac_addr, MAC_ADDR_LEN); - /* Set the dev->base_addr to the gfar reg region */ dev->base_addr = (unsigned long) (priv->regs); - SET_NETDEV_DEV(dev, &pdev->dev); + SET_NETDEV_DEV(dev, &ofdev->dev); /* Fill in the dev structure */ dev->open = gfar_enet_open; @@ -263,7 +364,7 @@ static int gfar_probe(struct platform_device *pdev) dev->ethtool_ops = &gfar_ethtool_ops; - if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) { + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) { priv->rx_csum_enable = 1; dev->features |= NETIF_F_IP_CSUM; } else @@ -271,7 +372,7 @@ static int gfar_probe(struct platform_device *pdev) priv->vlgrp = NULL; - if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) { + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_VLAN) { dev->vlan_rx_register = gfar_vlan_rx_register; dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; @@ -279,7 +380,7 @@ static int gfar_probe(struct platform_device *pdev) priv->vlan_enable = 1; } - if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_EXTENDED_HASH) { + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_EXTENDED_HASH) { priv->extended_hash = 1; priv->hash_width = 9; @@ -314,7 +415,7 @@ static int gfar_probe(struct platform_device *pdev) priv->hash_regs[7] = &priv->regs->gaddr7; } - if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_PADDING) + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_PADDING) priv->padding = DEFAULT_PADDING; else priv->padding = 0; @@ -368,29 +469,28 @@ regs_fail: return err; } -static int gfar_remove(struct platform_device *pdev) +static int gfar_remove(struct of_device *ofdev) { - struct net_device *dev = platform_get_drvdata(pdev); - struct gfar_private *priv = netdev_priv(dev); + struct gfar_private *priv = dev_get_drvdata(&ofdev->dev); - platform_set_drvdata(pdev, NULL); + dev_set_drvdata(&ofdev->dev, NULL); iounmap(priv->regs); - free_netdev(dev); + free_netdev(priv->dev); return 0; } #ifdef CONFIG_PM -static int gfar_suspend(struct platform_device *pdev, pm_message_t state) +static int gfar_suspend(struct of_device *ofdev, pm_message_t state) { - struct net_device *dev = platform_get_drvdata(pdev); - struct gfar_private *priv = netdev_priv(dev); + struct gfar_private *priv = dev_get_drvdata(&ofdev->dev); + struct net_device *dev = priv->dev; unsigned long flags; u32 tempval; int magic_packet = priv->wol_en && - (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET); + (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET); netif_device_detach(dev); @@ -431,14 +531,14 @@ static int gfar_suspend(struct platform_device *pdev, pm_message_t state) return 0; } -static int gfar_resume(struct platform_device *pdev) +static int gfar_resume(struct of_device *ofdev) { - struct net_device *dev = platform_get_drvdata(pdev); - struct gfar_private *priv = netdev_priv(dev); + struct gfar_private *priv = dev_get_drvdata(&ofdev->dev); + struct net_device *dev = priv->dev; unsigned long flags; u32 tempval; int magic_packet = priv->wol_en && - (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET); + (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET); if (!netif_running(dev)) { netif_device_attach(dev); @@ -497,7 +597,7 @@ static phy_interface_t gfar_get_interface(struct net_device *dev) if (ecntrl & ECNTRL_REDUCED_MII_MODE) return PHY_INTERFACE_MODE_RMII; else { - phy_interface_t interface = priv->einfo->interface; + phy_interface_t interface = priv->interface; /* * This isn't autodetected right now, so it must @@ -510,7 +610,7 @@ static phy_interface_t gfar_get_interface(struct net_device *dev) } } - if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT) + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT) return PHY_INTERFACE_MODE_GMII; return PHY_INTERFACE_MODE_MII; @@ -524,21 +624,18 @@ static int init_phy(struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); uint gigabit_support = - priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT ? + priv->device_flags & FSL_GIANFAR_DEV_HAS_GIGABIT ? SUPPORTED_1000baseT_Full : 0; struct phy_device *phydev; - char phy_id[BUS_ID_SIZE]; phy_interface_t interface; priv->oldlink = 0; priv->oldspeed = 0; priv->oldduplex = -1; - snprintf(phy_id, sizeof(phy_id), PHY_ID_FMT, priv->einfo->bus_id, priv->einfo->phy_id); - interface = gfar_get_interface(dev); - phydev = phy_connect(dev, phy_id, &adjust_link, 0, interface); + phydev = phy_connect(dev, priv->phy_bus_id, &adjust_link, 0, interface); if (interface == PHY_INTERFACE_MODE_SGMII) gfar_configure_serdes(dev); @@ -569,35 +666,31 @@ static int init_phy(struct net_device *dev) static void gfar_configure_serdes(struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); - struct gfar_mii __iomem *regs = - (void __iomem *)&priv->regs->gfar_mii_regs; - int tbipa = gfar_read(&priv->regs->tbipa); - struct mii_bus *bus = gfar_get_miibus(priv); - if (bus) - mutex_lock(&bus->mdio_lock); + if (!priv->tbiphy) { + printk(KERN_WARNING "SGMII mode requires that the device " + "tree specify a tbi-handle\n"); + return; + } - /* If the link is already up, we must already be ok, and don't need to + /* + * If the link is already up, we must already be ok, and don't need to * configure and reset the TBI<->SerDes link. Maybe U-Boot configured * everything for us? Resetting it takes the link down and requires * several seconds for it to come back. */ - if (gfar_local_mdio_read(regs, tbipa, MII_BMSR) & BMSR_LSTATUS) - goto done; + if (phy_read(priv->tbiphy, MII_BMSR) & BMSR_LSTATUS) + return; /* Single clk mode, mii mode off(for serdes communication) */ - gfar_local_mdio_write(regs, tbipa, MII_TBICON, TBICON_CLK_SELECT); + phy_write(priv->tbiphy, MII_TBICON, TBICON_CLK_SELECT); - gfar_local_mdio_write(regs, tbipa, MII_ADVERTISE, + phy_write(priv->tbiphy, MII_ADVERTISE, ADVERTISE_1000XFULL | ADVERTISE_1000XPAUSE | ADVERTISE_1000XPSE_ASYM); - gfar_local_mdio_write(regs, tbipa, MII_BMCR, BMCR_ANENABLE | + phy_write(priv->tbiphy, MII_BMCR, BMCR_ANENABLE | BMCR_ANRESTART | BMCR_FULLDPLX | BMCR_SPEED1000); - - done: - if (bus) - mutex_unlock(&bus->mdio_lock); } static void init_registers(struct net_device *dev) @@ -630,7 +723,7 @@ static void init_registers(struct net_device *dev) gfar_write(&priv->regs->gaddr7, 0); /* Zero out the rmon mib registers if it has them */ - if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_RMON) { + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) { memset_io(&(priv->regs->rmon), 0, sizeof (struct rmon_mib)); /* Mask off the CAM interrupts */ @@ -705,7 +798,7 @@ void stop_gfar(struct net_device *dev) spin_unlock_irqrestore(&priv->txlock, flags); /* Free the IRQs */ - if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) { + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) { free_irq(priv->interruptError, dev); free_irq(priv->interruptTransmit, dev); free_irq(priv->interruptReceive, dev); @@ -919,7 +1012,7 @@ int startup_gfar(struct net_device *dev) /* If the device has multiple interrupts, register for * them. Otherwise, only register for the one */ - if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) { + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) { /* Install our interrupt handlers for Error, * Transmit, and Receive */ if (request_irq(priv->interruptError, gfar_error, @@ -1751,7 +1844,7 @@ static void gfar_netpoll(struct net_device *dev) struct gfar_private *priv = netdev_priv(dev); /* If the device has multiple interrupts, run tx/rx */ - if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) { + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) { disable_irq(priv->interruptTransmit); disable_irq(priv->interruptReceive); disable_irq(priv->interruptError); @@ -2045,7 +2138,7 @@ static irqreturn_t gfar_error(int irq, void *dev_id) gfar_write(&priv->regs->ievent, events & IEVENT_ERR_MASK); /* Magic Packet is not an error. */ - if ((priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) && + if ((priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) && (events & IEVENT_MAG)) events &= ~IEVENT_MAG; @@ -2111,16 +2204,24 @@ static irqreturn_t gfar_error(int irq, void *dev_id) /* work with hotplug and coldplug */ MODULE_ALIAS("platform:fsl-gianfar"); +static struct of_device_id gfar_match[] = +{ + { + .type = "network", + .compatible = "gianfar", + }, + {}, +}; + /* Structure for a device driver */ -static struct platform_driver gfar_driver = { +static struct of_platform_driver gfar_driver = { + .name = "fsl-gianfar", + .match_table = gfar_match, + .probe = gfar_probe, .remove = gfar_remove, .suspend = gfar_suspend, .resume = gfar_resume, - .driver = { - .name = "fsl-gianfar", - .owner = THIS_MODULE, - }, }; static int __init gfar_init(void) @@ -2130,7 +2231,7 @@ static int __init gfar_init(void) if (err) return err; - err = platform_driver_register(&gfar_driver); + err = of_register_platform_driver(&gfar_driver); if (err) gfar_mdio_exit(); @@ -2140,7 +2241,7 @@ static int __init gfar_init(void) static void __exit gfar_exit(void) { - platform_driver_unregister(&gfar_driver); + of_unregister_platform_driver(&gfar_driver); gfar_mdio_exit(); } diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h index f46e9b63af13..ca7f0a6a68c5 100644 --- a/drivers/net/gianfar.h +++ b/drivers/net/gianfar.h @@ -657,6 +657,19 @@ struct gfar { }; +/* Flags related to gianfar device features */ +#define FSL_GIANFAR_DEV_HAS_GIGABIT 0x00000001 +#define FSL_GIANFAR_DEV_HAS_COALESCE 0x00000002 +#define FSL_GIANFAR_DEV_HAS_RMON 0x00000004 +#define FSL_GIANFAR_DEV_HAS_MULTI_INTR 0x00000008 +#define FSL_GIANFAR_DEV_HAS_CSUM 0x00000010 +#define FSL_GIANFAR_DEV_HAS_VLAN 0x00000020 +#define FSL_GIANFAR_DEV_HAS_EXTENDED_HASH 0x00000040 +#define FSL_GIANFAR_DEV_HAS_PADDING 0x00000080 +#define FSL_GIANFAR_DEV_HAS_MAGIC_PACKET 0x00000100 +#define FSL_GIANFAR_DEV_HAS_BD_STASHING 0x00000200 +#define FSL_GIANFAR_DEV_HAS_BUF_STASHING 0x00000400 + /* Struct stolen almost completely (and shamelessly) from the FCC enet source * (Ok, that's not so true anymore, but there is a family resemblence) * The GFAR buffer descriptors track the ring buffers. The rx_bd_base @@ -694,6 +707,7 @@ struct gfar_private { /* RX Locked fields */ spinlock_t rxlock; + struct device_node *node; struct net_device *dev; struct napi_struct napi; @@ -733,6 +747,9 @@ struct gfar_private { /* Bitfield update lock */ spinlock_t bflock; + phy_interface_t interface; + char phy_bus_id[BUS_ID_SIZE]; + u32 device_flags; unsigned char vlan_enable:1, rx_csum_enable:1, extended_hash:1, @@ -744,11 +761,9 @@ struct gfar_private { unsigned int interruptReceive; unsigned int interruptError; - /* info structure initialized by platform code */ - struct gianfar_platform_data *einfo; - /* PHY stuff */ struct phy_device *phydev; + struct phy_device *tbiphy; struct mii_bus *mii_bus; int oldspeed; int oldduplex; diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c index fb7d3ccc0fdc..53944b120a3d 100644 --- a/drivers/net/gianfar_ethtool.c +++ b/drivers/net/gianfar_ethtool.c @@ -121,7 +121,7 @@ static void gfar_gstrings(struct net_device *dev, u32 stringset, u8 * buf) { struct gfar_private *priv = netdev_priv(dev); - if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_RMON) + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) memcpy(buf, stat_gstrings, GFAR_STATS_LEN * ETH_GSTRING_LEN); else memcpy(buf, stat_gstrings, @@ -138,7 +138,7 @@ static void gfar_fill_stats(struct net_device *dev, struct ethtool_stats *dummy, struct gfar_private *priv = netdev_priv(dev); u64 *extra = (u64 *) & priv->extra_stats; - if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_RMON) { + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) { u32 __iomem *rmon = (u32 __iomem *) & priv->regs->rmon; struct gfar_stats *stats = (struct gfar_stats *) buf; @@ -158,7 +158,7 @@ static int gfar_sset_count(struct net_device *dev, int sset) switch (sset) { case ETH_SS_STATS: - if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_RMON) + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_RMON) return GFAR_STATS_LEN; else return GFAR_EXTRA_STATS_LEN; @@ -280,7 +280,7 @@ static int gfar_gcoalesce(struct net_device *dev, struct ethtool_coalesce *cvals { struct gfar_private *priv = netdev_priv(dev); - if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_COALESCE)) + if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_COALESCE)) return -EOPNOTSUPP; if (NULL == priv->phydev) @@ -332,7 +332,7 @@ static int gfar_scoalesce(struct net_device *dev, struct ethtool_coalesce *cvals { struct gfar_private *priv = netdev_priv(dev); - if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_COALESCE)) + if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_COALESCE)) return -EOPNOTSUPP; /* Set up rx coalescing */ @@ -482,7 +482,7 @@ static int gfar_set_rx_csum(struct net_device *dev, uint32_t data) unsigned long flags; int err = 0; - if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_CSUM)) + if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM)) return -EOPNOTSUPP; if (dev->flags & IFF_UP) { @@ -515,7 +515,7 @@ static uint32_t gfar_get_rx_csum(struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); - if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_CSUM)) + if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM)) return 0; return priv->rx_csum_enable; @@ -526,7 +526,7 @@ static int gfar_set_tx_csum(struct net_device *dev, uint32_t data) unsigned long flags; struct gfar_private *priv = netdev_priv(dev); - if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_CSUM)) + if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM)) return -EOPNOTSUPP; spin_lock_irqsave(&priv->txlock, flags); @@ -547,7 +547,7 @@ static uint32_t gfar_get_tx_csum(struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); - if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_CSUM)) + if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM)) return 0; return (dev->features & NETIF_F_IP_CSUM) != 0; @@ -570,7 +570,7 @@ static void gfar_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct gfar_private *priv = netdev_priv(dev); - if (priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) { + if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) { wol->supported = WAKE_MAGIC; wol->wolopts = priv->wol_en ? WAKE_MAGIC : 0; } else { @@ -583,7 +583,7 @@ static int gfar_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) struct gfar_private *priv = netdev_priv(dev); unsigned long flags; - if (!(priv->einfo->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) && + if (!(priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET) && wol->wolopts != 0) return -EINVAL; diff --git a/drivers/net/gianfar_mii.c b/drivers/net/gianfar_mii.c index 0e2595d24933..f3706e415b45 100644 --- a/drivers/net/gianfar_mii.c +++ b/drivers/net/gianfar_mii.c @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include #include @@ -150,19 +152,83 @@ static int gfar_mdio_reset(struct mii_bus *bus) return 0; } +/* Allocate an array which provides irq #s for each PHY on the given bus */ +static int *create_irq_map(struct device_node *np) +{ + int *irqs; + int i; + struct device_node *child = NULL; + + irqs = kcalloc(PHY_MAX_ADDR, sizeof(int), GFP_KERNEL); + + if (!irqs) + return NULL; + + for (i = 0; i < PHY_MAX_ADDR; i++) + irqs[i] = PHY_POLL; + + while ((child = of_get_next_child(np, child)) != NULL) { + int irq = irq_of_parse_and_map(child, 0); + const u32 *id; + + if (irq == NO_IRQ) + continue; + + id = of_get_property(child, "reg", NULL); + + if (!id) + continue; + + if (*id < PHY_MAX_ADDR && *id >= 0) + irqs[*id] = irq; + else + printk(KERN_WARNING "%s: " + "%d is not a valid PHY address\n", + np->full_name, *id); + } + + return irqs; +} + + +void gfar_mdio_bus_name(char *name, struct device_node *np) +{ + const u32 *reg; + + reg = of_get_property(np, "reg", NULL); -static int gfar_mdio_probe(struct device *dev) + snprintf(name, MII_BUS_ID_SIZE, "%s@%x", np->name, reg ? *reg : 0); +} + +/* Scan the bus in reverse, looking for an empty spot */ +static int gfar_mdio_find_free(struct mii_bus *new_bus) +{ + int i; + + for (i = PHY_MAX_ADDR; i > 0; i--) { + u32 phy_id; + + if (get_phy_id(new_bus, i, &phy_id)) + return -1; + + if (phy_id == 0xffffffff) + break; + } + + return i; +} + +static int gfar_mdio_probe(struct of_device *ofdev, + const struct of_device_id *match) { - struct platform_device *pdev = to_platform_device(dev); - struct gianfar_mdio_data *pdata; struct gfar_mii __iomem *regs; struct gfar __iomem *enet_regs; struct mii_bus *new_bus; - struct resource *r; - int i, err = 0; - - if (NULL == dev) - return -EINVAL; + int err = 0; + u64 addr, size; + struct device_node *np = ofdev->node; + struct device_node *tbi; + int tbiaddr = -1; new_bus = mdiobus_alloc(); if (NULL == new_bus) @@ -172,31 +238,28 @@ static int gfar_mdio_probe(struct device *dev) new_bus->read = &gfar_mdio_read, new_bus->write = &gfar_mdio_write, new_bus->reset = &gfar_mdio_reset, - snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", pdev->id); - - pdata = (struct gianfar_mdio_data *)pdev->dev.platform_data; - - if (NULL == pdata) { - printk(KERN_ERR "gfar mdio %d: Missing platform data!\n", pdev->id); - return -ENODEV; - } - - r = platform_get_resource(pdev, IORESOURCE_MEM, 0); + gfar_mdio_bus_name(new_bus->id, np); /* Set the PHY base address */ - regs = ioremap(r->start, sizeof (struct gfar_mii)); + addr = of_translate_address(np, of_get_address(np, 0, &size, NULL)); + regs = ioremap(addr, size); if (NULL == regs) { err = -ENOMEM; - goto reg_map_fail; + goto err_free_bus; } new_bus->priv = (void __force *)regs; - new_bus->irq = pdata->irq; + new_bus->irq = create_irq_map(np); + + if (new_bus->irq == NULL) { + err = -ENOMEM; + goto err_unmap_regs; + } - new_bus->parent = dev; - dev_set_drvdata(dev, new_bus); + new_bus->parent = &ofdev->dev; + dev_set_drvdata(&ofdev->dev, new_bus); /* * This is mildly evil, but so is our hardware for doing this. @@ -206,96 +269,109 @@ static int gfar_mdio_probe(struct device *dev) enet_regs = (struct gfar __iomem *) ((char *)regs - offsetof(struct gfar, gfar_mii_regs)); - /* Scan the bus, looking for an empty spot for TBIPA */ - gfar_write(&enet_regs->tbipa, 0); - for (i = PHY_MAX_ADDR; i > 0; i--) { - u32 phy_id; + for_each_child_of_node(np, tbi) { + if (!strncmp(tbi->type, "tbi-phy", 8)) + break; + } - err = get_phy_id(new_bus, i, &phy_id); - if (err) - goto bus_register_fail; + if (tbi) { + const u32 *prop = of_get_property(tbi, "reg", NULL); - if (phy_id == 0xffffffff) - break; + if (prop) + tbiaddr = *prop; } - /* The bus is full. We don't support using 31 PHYs, sorry */ - if (i == 0) { + if (tbiaddr == -1) { + gfar_write(&enet_regs->tbipa, 0); + + tbiaddr = gfar_mdio_find_free(new_bus); + } + + /* + * We define TBIPA at 0 to be illegal, opting to fail for boards that + * have PHYs at 1-31, rather than change tbipa and rescan. + */ + if (tbiaddr == 0) { err = -EBUSY; - goto bus_register_fail; + goto err_free_irqs; } - gfar_write(&enet_regs->tbipa, i); + gfar_write(&enet_regs->tbipa, tbiaddr); + + /* + * The TBIPHY-only buses will find PHYs at every address, + * so we mask them all but the TBI + */ + if (!of_device_is_compatible(np, "fsl,gianfar-mdio")) + new_bus->phy_mask = ~(1 << tbiaddr); err = mdiobus_register(new_bus); - if (0 != err) { + if (err != 0) { printk (KERN_ERR "%s: Cannot register as MDIO bus\n", new_bus->name); - goto bus_register_fail; + goto err_free_irqs; } return 0; -bus_register_fail: +err_free_irqs: + kfree(new_bus->irq); +err_unmap_regs: iounmap(regs); -reg_map_fail: +err_free_bus: mdiobus_free(new_bus); return err; } -static int gfar_mdio_remove(struct device *dev) +static int gfar_mdio_remove(struct of_device *ofdev) { - struct mii_bus *bus = dev_get_drvdata(dev); + struct mii_bus *bus = dev_get_drvdata(&ofdev->dev); mdiobus_unregister(bus); - dev_set_drvdata(dev, NULL); + dev_set_drvdata(&ofdev->dev, NULL); iounmap((void __iomem *)bus->priv); bus->priv = NULL; + kfree(bus->irq); mdiobus_free(bus); return 0; } -static struct device_driver gianfar_mdio_driver = { +static struct of_device_id gfar_mdio_match[] = +{ + { + .compatible = "fsl,gianfar-mdio", + }, + { + .compatible = "fsl,gianfar-tbi", + }, + { + .type = "mdio", + .compatible = "gianfar", + }, + {}, +}; + +static struct of_platform_driver gianfar_mdio_driver = { .name = "fsl-gianfar_mdio", - .bus = &platform_bus_type, + .match_table = gfar_mdio_match, + .probe = gfar_mdio_probe, .remove = gfar_mdio_remove, }; -static int match_mdio_bus(struct device *dev, void *data) -{ - const struct gfar_private *priv = data; - const struct platform_device *pdev = to_platform_device(dev); - - return !strcmp(pdev->name, gianfar_mdio_driver.name) && - pdev->id == priv->einfo->mdio_bus; -} - -/* Given a gfar_priv structure, find the mii_bus controlled by this device (not - * necessarily the same as the bus the gfar's PHY is on), if one exists. - * Normally only the first gianfar controls a mii_bus. */ -struct mii_bus *gfar_get_miibus(const struct gfar_private *priv) -{ - /*const*/ struct device *d; - - d = bus_find_device(gianfar_mdio_driver.bus, NULL, (void *)priv, - match_mdio_bus); - return d ? dev_get_drvdata(d) : NULL; -} - int __init gfar_mdio_init(void) { - return driver_register(&gianfar_mdio_driver); + return of_register_platform_driver(&gianfar_mdio_driver); } void gfar_mdio_exit(void) { - driver_unregister(&gianfar_mdio_driver); + of_unregister_platform_driver(&gianfar_mdio_driver); } diff --git a/drivers/net/gianfar_mii.h b/drivers/net/gianfar_mii.h index 02dc970ca1ff..65c242cd468a 100644 --- a/drivers/net/gianfar_mii.h +++ b/drivers/net/gianfar_mii.h @@ -49,4 +49,6 @@ int gfar_local_mdio_read(struct gfar_mii __iomem *regs, int mii_id, int regnum); struct mii_bus *gfar_get_miibus(const struct gfar_private *priv); int __init gfar_mdio_init(void); void gfar_mdio_exit(void); + +void gfar_mdio_bus_name(char *name, struct device_node *np); #endif /* GIANFAR_PHY_H */ diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 708bab58d8d0..d9051d717d27 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -47,12 +47,7 @@ struct gianfar_platform_data { /* device specific information */ u32 device_flags; - /* board specific information */ - u32 board_flags; - int mdio_bus; /* Bus controlled by us */ - char bus_id[MII_BUS_ID_SIZE]; /* Bus PHY is on */ - u32 phy_id; - u8 mac_addr[6]; + char bus_id[BUS_ID_SIZE]; phy_interface_t interface; }; @@ -61,17 +56,6 @@ struct gianfar_mdio_data { int irq[32]; }; -/* Flags related to gianfar device features */ -#define FSL_GIANFAR_DEV_HAS_GIGABIT 0x00000001 -#define FSL_GIANFAR_DEV_HAS_COALESCE 0x00000002 -#define FSL_GIANFAR_DEV_HAS_RMON 0x00000004 -#define FSL_GIANFAR_DEV_HAS_MULTI_INTR 0x00000008 -#define FSL_GIANFAR_DEV_HAS_CSUM 0x00000010 -#define FSL_GIANFAR_DEV_HAS_VLAN 0x00000020 -#define FSL_GIANFAR_DEV_HAS_EXTENDED_HASH 0x00000040 -#define FSL_GIANFAR_DEV_HAS_PADDING 0x00000080 -#define FSL_GIANFAR_DEV_HAS_MAGIC_PACKET 0x00000100 - /* Flags in gianfar_platform_data */ #define FSL_GIANFAR_BRD_HAS_PHY_INTR 0x00000001 /* set or use a timer */ #define FSL_GIANFAR_BRD_IS_REDUCED 0x00000002 /* Set if RGMII, RMII */ -- cgit v1.2.3 From e08e1f7adba522378e8d2ae941bf25443866136d Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 16 Dec 2008 12:17:30 -0800 Subject: swiotlb: allow architectures to override phys<->bus<->phys conversions Impact: generalize phys<->bus<->phys conversions in the swiotlb code Architectures may need to override these conversions. Implement a __weak hook point containing the default implementation. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 3 +++ lib/swiotlb.c | 52 ++++++++++++++++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 58b996a642f9..694f1839cbc0 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -27,6 +27,9 @@ swiotlb_init(void); extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs); extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); +extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address); +extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); + extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 1272b23e4769..3494263cdd9a 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -125,6 +125,26 @@ void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs) return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); } +dma_addr_t __weak swiotlb_phys_to_bus(phys_addr_t paddr) +{ + return paddr; +} + +phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr) +{ + return baddr; +} + +static dma_addr_t swiotlb_virt_to_bus(volatile void *address) +{ + return swiotlb_phys_to_bus(virt_to_phys(address)); +} + +static void *swiotlb_bus_to_virt(dma_addr_t address) +{ + return phys_to_virt(swiotlb_bus_to_phys(address)); +} + /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. @@ -168,7 +188,7 @@ swiotlb_init_with_default_size(size_t default_size) panic("Cannot allocate SWIOTLB overflow buffer!\n"); printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n", - virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end)); + swiotlb_virt_to_bus(io_tlb_start), swiotlb_virt_to_bus(io_tlb_end)); } void __init @@ -250,7 +270,7 @@ swiotlb_late_init_with_default_size(size_t default_size) printk(KERN_INFO "Placing %luMB software IO TLB between 0x%lx - " "0x%lx\n", bytes >> 20, - virt_to_bus(io_tlb_start), virt_to_bus(io_tlb_end)); + swiotlb_virt_to_bus(io_tlb_start), swiotlb_virt_to_bus(io_tlb_end)); return 0; @@ -298,7 +318,7 @@ map_single(struct device *hwdev, char *buffer, size_t size, int dir) unsigned long max_slots; mask = dma_get_seg_boundary(hwdev); - start_dma_addr = virt_to_bus(io_tlb_start) & mask; + start_dma_addr = swiotlb_virt_to_bus(io_tlb_start) & mask; offset_slots = ALIGN(start_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; @@ -475,7 +495,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_mask = hwdev->coherent_dma_mask; ret = (void *)__get_free_pages(flags, order); - if (ret && !is_buffer_dma_capable(dma_mask, virt_to_bus(ret), size)) { + if (ret && !is_buffer_dma_capable(dma_mask, swiotlb_virt_to_bus(ret), size)) { /* * The allocated memory isn't reachable by the device. * Fall back on swiotlb_map_single(). @@ -496,7 +516,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size, } memset(ret, 0, size); - dev_addr = virt_to_bus(ret); + dev_addr = swiotlb_virt_to_bus(ret); /* Confirm address can be DMA'd by device */ if (!is_buffer_dma_capable(dma_mask, dev_addr, size)) { @@ -556,7 +576,7 @@ dma_addr_t swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, int dir, struct dma_attrs *attrs) { - dma_addr_t dev_addr = virt_to_bus(ptr); + dma_addr_t dev_addr = swiotlb_virt_to_bus(ptr); void *map; BUG_ON(dir == DMA_NONE); @@ -577,7 +597,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, map = io_tlb_overflow_buffer; } - dev_addr = virt_to_bus(map); + dev_addr = swiotlb_virt_to_bus(map); /* * Ensure that the address returned is DMA'ble @@ -607,7 +627,7 @@ void swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir, struct dma_attrs *attrs) { - char *dma_addr = bus_to_virt(dev_addr); + char *dma_addr = swiotlb_bus_to_virt(dev_addr); BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(dma_addr)) @@ -637,7 +657,7 @@ static void swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, size_t size, int dir, int target) { - char *dma_addr = bus_to_virt(dev_addr); + char *dma_addr = swiotlb_bus_to_virt(dev_addr); BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(dma_addr)) @@ -668,7 +688,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, unsigned long offset, size_t size, int dir, int target) { - char *dma_addr = bus_to_virt(dev_addr) + offset; + char *dma_addr = swiotlb_bus_to_virt(dev_addr) + offset; BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(dma_addr)) @@ -724,7 +744,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, for_each_sg(sgl, sg, nelems, i) { addr = SG_ENT_VIRT_ADDRESS(sg); - dev_addr = virt_to_bus(addr); + dev_addr = swiotlb_virt_to_bus(addr); if (swiotlb_force || address_needs_mapping(hwdev, dev_addr, sg->length)) { void *map = map_single(hwdev, addr, sg->length, dir); @@ -737,7 +757,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, sgl[0].dma_length = 0; return 0; } - sg->dma_address = virt_to_bus(map); + sg->dma_address = swiotlb_virt_to_bus(map); } else sg->dma_address = dev_addr; sg->dma_length = sg->length; @@ -768,7 +788,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, for_each_sg(sgl, sg, nelems, i) { if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) - unmap_single(hwdev, bus_to_virt(sg->dma_address), + unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), sg->dma_length, dir); else if (dir == DMA_FROM_DEVICE) dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); @@ -801,7 +821,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, for_each_sg(sgl, sg, nelems, i) { if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg)) - sync_single(hwdev, bus_to_virt(sg->dma_address), + sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address), sg->dma_length, dir, target); else if (dir == DMA_FROM_DEVICE) dma_mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length); @@ -825,7 +845,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) { - return (dma_addr == virt_to_bus(io_tlb_overflow_buffer)); + return (dma_addr == swiotlb_virt_to_bus(io_tlb_overflow_buffer)); } /* @@ -837,7 +857,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) int swiotlb_dma_supported(struct device *hwdev, u64 mask) { - return virt_to_bus(io_tlb_end - 1) <= mask; + return swiotlb_virt_to_bus(io_tlb_end - 1) <= mask; } EXPORT_SYMBOL(swiotlb_map_single); -- cgit v1.2.3 From b81ea27b2329bf44b30c427800954f845896d476 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 16 Dec 2008 12:17:31 -0800 Subject: swiotlb: add arch hook to force mapping Impact: generalize the sw-IOTLB range checks Some architectures require special rules to determine whether a range needs mapping or not. This adds a weak function for architectures to override. Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/swiotlb.h | 2 ++ lib/swiotlb.c | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 694f1839cbc0..325af1de0351 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -30,6 +30,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address); extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); +extern int swiotlb_arch_range_needs_mapping(void *ptr, size_t size); + extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 3494263cdd9a..d8b09051c455 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -145,6 +145,11 @@ static void *swiotlb_bus_to_virt(dma_addr_t address) return phys_to_virt(swiotlb_bus_to_phys(address)); } +int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) +{ + return 0; +} + /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. @@ -297,6 +302,11 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size) return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); } +static inline int range_needs_mapping(void *ptr, size_t size) +{ + return swiotlb_force || swiotlb_arch_range_needs_mapping(ptr, size); +} + static int is_swiotlb_buffer(char *addr) { return addr >= io_tlb_start && addr < io_tlb_end; @@ -585,7 +595,8 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size, * we can safely return the device addr and not worry about bounce * buffering it. */ - if (!address_needs_mapping(hwdev, dev_addr, size) && !swiotlb_force) + if (!address_needs_mapping(hwdev, dev_addr, size) && + !range_needs_mapping(ptr, size)) return dev_addr; /* @@ -745,7 +756,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, for_each_sg(sgl, sg, nelems, i) { addr = SG_ENT_VIRT_ADDRESS(sg); dev_addr = swiotlb_virt_to_bus(addr); - if (swiotlb_force || + if (range_needs_mapping(sg_virt(sg), sg->length) || address_needs_mapping(hwdev, dev_addr, sg->length)) { void *map = map_single(hwdev, addr, sg->length, dir); if (!map) { -- cgit v1.2.3 From 9a9fafb89433c5fd1331bac0c84c4b321e358b42 Mon Sep 17 00:00:00 2001 From: Phil Endecott Date: Mon, 1 Dec 2008 10:22:33 -0500 Subject: USB: fix comment about endianness of descriptors This patch fixes a comment and clarifies the documentation about the endianness of descriptors. The current policy is that descriptors will be little-endian at the API even on big-endian systems; however the /proc/bus/usb API predates this policy and presents descriptors with some multibyte fields byte-swapped. Signed-off-by: Phil Endecott Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/proc_usb_info.txt | 6 ++++-- include/linux/usb/ch9.h | 8 ++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/Documentation/usb/proc_usb_info.txt b/Documentation/usb/proc_usb_info.txt index 077e9032d0cd..fafcd4723260 100644 --- a/Documentation/usb/proc_usb_info.txt +++ b/Documentation/usb/proc_usb_info.txt @@ -49,8 +49,10 @@ it and 002/048 sometime later. These files can be read as binary data. The binary data consists of first the device descriptor, then the descriptors for each -configuration of the device. That information is also shown in -text form by the /proc/bus/usb/devices file, described later. +configuration of the device. Multi-byte fields in the device and +configuration descriptors, but not other descriptors, are converted +to host endianness by the kernel. This information is also shown +in text form by the /proc/bus/usb/devices file, described later. These files may also be used to write user-level drivers for the USB devices. You would open the /proc/bus/usb/BBB/DDD file read/write, diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index 73a2f4eb1f7a..9b42baed3900 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -158,8 +158,12 @@ struct usb_ctrlrequest { * (rarely) accepted by SET_DESCRIPTOR. * * Note that all multi-byte values here are encoded in little endian - * byte order "on the wire". But when exposed through Linux-USB APIs, - * they've been converted to cpu byte order. + * byte order "on the wire". Within the kernel and when exposed + * through the Linux-USB APIs, they are not converted to cpu byte + * order; it is the responsibility of the client code to do this. + * The single exception is when device and configuration descriptors (but + * not other descriptors) are read from usbfs (i.e. /proc/bus/usb/BBB/DDD); + * in this case the fields are converted to host endianness by the kernel. */ /* -- cgit v1.2.3 From 69c30e1e7492192f882a3fc11888b320fde5206a Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 17 Dec 2008 15:44:58 -0800 Subject: irda: Add irda_skb_cb qdisc related padding We need to pad irda_skb_cb in order to keep it safe accross dev_queue_xmit() calls. This is some ugly and temporary hack triggered by recent qisc code changes. Even though it fixes bugzilla.kernel.org bug #11795, it will be replaced by a proper fix before 2.6.29 is released. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- include/net/irda/irda_device.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h index 3025ae17ddbe..94c852d47d0f 100644 --- a/include/net/irda/irda_device.h +++ b/include/net/irda/irda_device.h @@ -135,9 +135,11 @@ struct dongle_reg { /* * Per-packet information we need to hide inside sk_buff - * (must not exceed 48 bytes, check with struct sk_buff) + * (must not exceed 48 bytes, check with struct sk_buff) + * The default_qdisc_pad field is a temporary hack. */ struct irda_skb_cb { + unsigned int default_qdisc_pad; magic_t magic; /* Be sure that we can trust the information */ __u32 next_speed; /* The Speed to be set *after* this frame */ __u16 mtt; /* Minimum turn around time */ -- cgit v1.2.3 From 2d91d78b68606ff7ce52ea70e187dee7831aa2f6 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 17 Dec 2008 15:47:29 -0800 Subject: Phonet: allocate a non-Ethernet ARP type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also leave some room for more 802.11 types. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_arp.h | 2 ++ net/core/dev.c | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 4d3401812e6c..11df77ab2dbb 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -87,6 +87,8 @@ #define ARPHRD_IEEE80211_PRISM 802 /* IEEE 802.11 + Prism2 header */ #define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */ +#define ARPHRD_PHONET 820 /* PhoNet media type */ + #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ diff --git a/net/core/dev.c b/net/core/dev.c index d8d7d1fccde4..15aab0c46d6d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -283,8 +283,8 @@ static const unsigned short netdev_lock_type[] = ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET, ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, - ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID, - ARPHRD_NONE}; + ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, + ARPHRD_VOID, ARPHRD_NONE}; static const char *netdev_lock_name[] = {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", @@ -300,8 +300,8 @@ static const char *netdev_lock_name[] = "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET", "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", - "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID", - "_xmit_NONE"}; + "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", + "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; -- cgit v1.2.3 From 57c81fffc863fb4c1804bc963bcbfb82d736c6df Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 17 Dec 2008 15:47:48 -0800 Subject: Phonet: allocate separate ARP type for GPRS over a Phonet pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A separate xmit lock class supports GPRS over a Phonet pipe over a TUN device (type ARPHRD_NONE). Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/if_arp.h | 1 + net/core/dev.c | 4 ++-- net/phonet/pep-gprs.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 11df77ab2dbb..5ff89809a581 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -88,6 +88,7 @@ #define ARPHRD_IEEE80211_RADIOTAP 803 /* IEEE 802.11 + radiotap header */ #define ARPHRD_PHONET 820 /* PhoNet media type */ +#define ARPHRD_PHONET_PIPE 821 /* PhoNet pipe header */ #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ diff --git a/net/core/dev.c b/net/core/dev.c index 15aab0c46d6d..048cf1197872 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -284,7 +284,7 @@ static const unsigned short netdev_lock_type[] = ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL, ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, - ARPHRD_VOID, ARPHRD_NONE}; + ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE}; static const char *netdev_lock_name[] = {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25", @@ -301,7 +301,7 @@ static const char *netdev_lock_name[] = "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL", "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", - "_xmit_VOID", "_xmit_NONE"}; + "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"}; static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)]; static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)]; diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c index 0b640b0fce0c..a2873203dff2 100644 --- a/net/phonet/pep-gprs.c +++ b/net/phonet/pep-gprs.c @@ -260,7 +260,7 @@ static int gprs_set_mtu(struct net_device *dev, int new_mtu) static void gprs_setup(struct net_device *dev) { dev->features = NETIF_F_FRAGLIST; - dev->type = ARPHRD_NONE; + dev->type = ARPHRD_PHONET_PIPE; dev->flags = IFF_POINTOPOINT | IFF_NOARP; dev->mtu = GPRS_DEFAULT_MTU; dev->hard_header_len = 0; -- cgit v1.2.3 From be677730a0ccb6bedced6f65f2ba8f57a3c607ba Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 17 Dec 2008 15:48:31 -0800 Subject: Phonet: use atomic for packet TX window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GPRS TX flow control won't need to lock the underlying socket anymore. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pep.h | 2 +- net/phonet/pep.c | 38 +++++++++++++++++++++----------------- net/phonet/socket.c | 2 +- 3 files changed, 23 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index fcd793030e4d..4c61cdce4e5f 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -35,12 +35,12 @@ struct pep_sock { struct sock *listener; struct sk_buff_head ctrlreq_queue; #define PNPIPE_CTRLREQ_MAX 10 + atomic_t tx_credits; int ifindex; u16 peer_type; /* peer type/subtype */ u8 pipe_handle; u8 rx_credits; - u8 tx_credits; u8 rx_fc; /* RX flow control */ u8 tx_fc; /* TX flow control */ u8 init_enable; /* auto-enable at creation */ diff --git a/net/phonet/pep.c b/net/phonet/pep.c index bc6d50f83249..bb3e67849b38 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -225,6 +225,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) { struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *hdr = pnp_hdr(skb); + int wake = 0; if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) return -EINVAL; @@ -241,16 +242,16 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) case PN_LEGACY_FLOW_CONTROL: switch (hdr->data[4]) { case PEP_IND_BUSY: - pn->tx_credits = 0; + atomic_set(&pn->tx_credits, 0); break; case PEP_IND_READY: - pn->tx_credits = 1; + atomic_set(&pn->tx_credits, wake = 1); break; } break; case PN_ONE_CREDIT_FLOW_CONTROL: if (hdr->data[4] == PEP_IND_READY) - pn->tx_credits = 1; + atomic_set(&pn->tx_credits, wake = 1); break; } break; @@ -258,10 +259,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) case PN_PEP_IND_ID_MCFC_GRANT_CREDITS: if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL) break; - if (pn->tx_credits + hdr->data[4] > 0xff) - pn->tx_credits = 0xff; - else - pn->tx_credits += hdr->data[4]; + atomic_add(wake = hdr->data[4], &pn->tx_credits); break; default: @@ -269,7 +267,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) (unsigned)hdr->data[1]); return -EOPNOTSUPP; } - if (pn->tx_credits) + if (wake) sk->sk_write_space(sk); return 0; } @@ -343,7 +341,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) } /* fall through */ case PNS_PEP_DISABLE_REQ: - pn->tx_credits = 0; + atomic_set(&pn->tx_credits, 0); pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); break; @@ -390,7 +388,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) /* fall through */ case PNS_PIPE_ENABLED_IND: if (!pn_flow_safe(pn->tx_fc)) { - pn->tx_credits = 1; + atomic_set(&pn->tx_credits, 1); sk->sk_write_space(sk); } if (sk->sk_state == TCP_ESTABLISHED) @@ -504,8 +502,9 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) newpn->pn_sk.resource = pn->pn_sk.resource; skb_queue_head_init(&newpn->ctrlreq_queue); newpn->pipe_handle = pipe_handle; + atomic_set(&newpn->tx_credits, 0); newpn->peer_type = peer_type; - newpn->rx_credits = newpn->tx_credits = 0; + newpn->rx_credits = 0; newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; newpn->init_enable = enabled; @@ -821,14 +820,18 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *ph; + if (pn_flow_safe(pn->tx_fc) && + !atomic_add_unless(&pn->tx_credits, -1, 0)) { + kfree_skb(skb); + return -ENOBUFS; + } + skb_push(skb, 3); skb_reset_transport_header(skb); ph = pnp_hdr(skb); ph->utid = 0; ph->message_id = PNS_PIPE_DATA; ph->pipe_handle = pn->pipe_handle; - if (pn_flow_safe(pn->tx_fc) && pn->tx_credits) - pn->tx_credits--; return pn_skb_send(sk, skb, &pipe_srv); } @@ -866,7 +869,7 @@ disabled: BUG_ON(sk->sk_state != TCP_ESTABLISHED); /* Wait until flow control allows TX */ - done = pn->tx_credits > 0; + done = atomic_read(&pn->tx_credits); while (!done) { DEFINE_WAIT(wait); @@ -881,7 +884,7 @@ disabled: prepare_to_wait(&sk->sk_socket->wait, &wait, TASK_INTERRUPTIBLE); - done = sk_wait_event(sk, &timeo, pn->tx_credits > 0); + done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits)); finish_wait(&sk->sk_socket->wait, &wait); if (sk->sk_state != TCP_ESTABLISHED) @@ -895,7 +898,8 @@ disabled: goto out; skb_reserve(skb, MAX_PHONET_HEADER + 3); - if (sk->sk_state != TCP_ESTABLISHED || !pn->tx_credits) + if (sk->sk_state != TCP_ESTABLISHED || + !atomic_read(&pn->tx_credits)) goto disabled; /* sock_alloc_send_skb might sleep */ } @@ -917,7 +921,7 @@ int pep_writeable(struct sock *sk) { struct pep_sock *pn = pep_sk(sk); - return (sk->sk_state == TCP_ESTABLISHED) ? pn->tx_credits : 0; + return atomic_read(&pn->tx_credits); } int pep_write(struct sock *sk, struct sk_buff *skb) diff --git a/net/phonet/socket.c b/net/phonet/socket.c index c75aa5cdead5..ada2a35bf7a2 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -227,7 +227,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, if (!mask && sk->sk_state == TCP_CLOSE_WAIT) return POLLHUP; - if (sk->sk_state == TCP_ESTABLISHED && pn->tx_credits) + if (sk->sk_state == TCP_ESTABLISHED && atomic_read(&pn->tx_credits)) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; return mask; -- cgit v1.2.3 From f38f1d2aa5a3520cf05da7cd6bd12fe2b0c509b7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 16 Dec 2008 23:06:40 -0500 Subject: trace: add a way to enable or disable the stack tracer Impact: enhancement to stack tracer The stack tracer currently is either on when configured in or off when it is not. It can not be disabled when it is configured on. (besides disabling the function tracer that it uses) This patch adds a way to enable or disable the stack tracer at run time. It defaults off on bootup, but a kernel parameter 'stacktrace' has been added to enable it on bootup. A new sysctl has been added "kernel.stack_tracer_enabled" to let the user enable or disable the stack tracer at run time. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 4 +++ include/linux/ftrace.h | 8 ++++++ kernel/sysctl.c | 10 +++++++ kernel/trace/Kconfig | 13 +++++++--- kernel/trace/trace_stack.c | 52 ++++++++++++++++++++++++++++++++++--- 5 files changed, 79 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2919a2e91938..edab81c13182 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -89,6 +89,7 @@ parameter is applicable: SPARC Sparc architecture is enabled. SWSUSP Software suspend (hibernation) is enabled. SUSPEND System suspend states are enabled. + FTRACE Function tracing enabled. TS Appropriate touchscreen support is enabled. USB USB support is enabled. USBHID USB Human Interface Device support is enabled. @@ -2173,6 +2174,9 @@ and is between 256 and 4096 characters. It is defined in the file st= [HW,SCSI] SCSI tape parameters (buffers, etc.) See Documentation/scsi/st.txt. + stacktrace [FTRACE] + Enabled the stack tracer on boot up. + sti= [PARISC,HW] Format: Set the STI (builtin display/keyboard on the HP-PARISC diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 44020f31bd81..6b0db53caa7d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -86,6 +86,14 @@ static inline void ftrace_stop(void) { } static inline void ftrace_start(void) { } #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_STACK_TRACER +extern int stack_tracer_enabled; +int +stack_trace_sysctl(struct ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *lenp, + loff_t *ppos); +#endif + #ifdef CONFIG_DYNAMIC_FTRACE /* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ #include diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c83f566e940a..6ac501a2dcc6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -487,6 +487,16 @@ static struct ctl_table kern_table[] = { .proc_handler = &ftrace_enable_sysctl, }, #endif +#ifdef CONFIG_STACK_TRACER + { + .ctl_name = CTL_UNNUMBERED, + .procname = "stack_tracer_enabled", + .data = &stack_tracer_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &stack_trace_sysctl, + }, +#endif #ifdef CONFIG_TRACING { .ctl_name = CTL_UNNUMBERED, diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d8bae6f4219e..e2a4ff6fc3a6 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -244,10 +244,15 @@ config STACK_TRACER This tracer works by hooking into every function call that the kernel executes, and keeping a maximum stack depth value and - stack-trace saved. Because this logic has to execute in every - kernel function, all the time, this option can slow down the - kernel measurably and is generally intended for kernel - developers only. + stack-trace saved. If this is configured with DYNAMIC_FTRACE + then it will not have any overhead while the stack tracer + is disabled. + + To enable the stack tracer on bootup, pass in 'stacktrace' + on the kernel command line. + + The stack tracer can also be enabled or disabled via the + sysctl kernel.stack_tracer_enabled Say N if unsure. diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 0b863f2cbc8e..4842c969c785 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include "trace.h" @@ -31,6 +32,10 @@ static raw_spinlock_t max_stack_lock = static int stack_trace_disabled __read_mostly; static DEFINE_PER_CPU(int, trace_active); +static DEFINE_MUTEX(stack_sysctl_mutex); + +int stack_tracer_enabled; +static int last_stack_tracer_enabled; static inline void check_stack(void) { @@ -174,7 +179,7 @@ stack_max_size_write(struct file *filp, const char __user *ubuf, return count; } -static struct file_operations stack_max_size_fops = { +static const struct file_operations stack_max_size_fops = { .open = tracing_open_generic, .read = stack_max_size_read, .write = stack_max_size_write, @@ -272,7 +277,7 @@ static int t_show(struct seq_file *m, void *v) return 0; } -static struct seq_operations stack_trace_seq_ops = { +static const struct seq_operations stack_trace_seq_ops = { .start = t_start, .next = t_next, .stop = t_stop, @@ -288,12 +293,48 @@ static int stack_trace_open(struct inode *inode, struct file *file) return ret; } -static struct file_operations stack_trace_fops = { +static const struct file_operations stack_trace_fops = { .open = stack_trace_open, .read = seq_read, .llseek = seq_lseek, }; +int +stack_trace_sysctl(struct ctl_table *table, int write, + struct file *file, void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret; + + mutex_lock(&stack_sysctl_mutex); + + ret = proc_dointvec(table, write, file, buffer, lenp, ppos); + + if (ret || !write || + (last_stack_tracer_enabled == stack_tracer_enabled)) + goto out; + + last_stack_tracer_enabled = stack_tracer_enabled; + + if (stack_tracer_enabled) + register_ftrace_function(&trace_ops); + else + unregister_ftrace_function(&trace_ops); + + out: + mutex_unlock(&stack_sysctl_mutex); + return ret; +} + +static int start_stack_trace __initdata; + +static __init int enable_stacktrace(char *str) +{ + start_stack_trace = 1; + return 1; +} +__setup("stacktrace", enable_stacktrace); + static __init int stack_trace_init(void) { struct dentry *d_tracer; @@ -311,7 +352,10 @@ static __init int stack_trace_init(void) if (!entry) pr_warning("Could not create debugfs 'stack_trace' entry\n"); - register_ftrace_function(&trace_ops); + if (start_stack_trace) { + register_ftrace_function(&trace_ops); + stack_tracer_enabled = 1; + } return 0; } -- cgit v1.2.3 From 9c2c48020ec0dd6ecd27e5a1298f73b40d85a595 Mon Sep 17 00:00:00 2001 From: Ken Chen Date: Tue, 16 Dec 2008 23:41:22 -0800 Subject: schedstat: consolidate per-task cpu runtime stats Impact: simplify code When we turn on CONFIG_SCHEDSTATS, per-task cpu runtime is accumulated twice. Once in task->se.sum_exec_runtime and once in sched_info.cpu_time. These two stats are exactly the same. Given that task->se.sum_exec_runtime is always accumulated by the core scheduler, sched_info can reuse that data instead of duplicate the accounting. Signed-off-by: Ken Chen Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- fs/proc/base.c | 2 +- include/linux/sched.h | 3 +-- kernel/delayacct.c | 2 +- kernel/sched.c | 2 ++ kernel/sched_stats.h | 5 ++--- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/proc/base.c b/fs/proc/base.c index d4677603c889..4d745bac768c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -347,7 +347,7 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) static int proc_pid_schedstat(struct task_struct *task, char *buffer) { return sprintf(buffer, "%llu %llu %lu\n", - task->sched_info.cpu_time, + task->se.sum_exec_runtime, task->sched_info.run_delay, task->sched_info.pcount); } diff --git a/include/linux/sched.h b/include/linux/sched.h index 8cccd6dc5d66..2d1e840ddd35 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -670,8 +670,7 @@ struct reclaim_state; struct sched_info { /* cumulative counters */ unsigned long pcount; /* # of times run on this cpu */ - unsigned long long cpu_time, /* time spent on the cpu */ - run_delay; /* time spent waiting on a runqueue */ + unsigned long long run_delay; /* time spent waiting on a runqueue */ /* timestamps */ unsigned long long last_arrival,/* when we last ran on a cpu */ diff --git a/kernel/delayacct.c b/kernel/delayacct.c index b3179dad71be..abb6e17505e2 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -127,7 +127,7 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) */ t1 = tsk->sched_info.pcount; t2 = tsk->sched_info.run_delay; - t3 = tsk->sched_info.cpu_time; + t3 = tsk->se.sum_exec_runtime; d->cpu_count += t1; diff --git a/kernel/sched.c b/kernel/sched.c index f53e2b8ef521..fd835fc320b8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -596,6 +596,8 @@ struct rq { #ifdef CONFIG_SCHEDSTATS /* latency stats */ struct sched_info rq_sched_info; + unsigned long long rq_cpu_time; + /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ /* sys_sched_yield() stats */ unsigned int yld_exp_empty; diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 7dbf72a2b02c..3b01098164c8 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -31,7 +31,7 @@ static int show_schedstat(struct seq_file *seq, void *v) rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count, rq->sched_switch, rq->sched_count, rq->sched_goidle, rq->ttwu_count, rq->ttwu_local, - rq->rq_sched_info.cpu_time, + rq->rq_cpu_time, rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount); seq_printf(seq, "\n"); @@ -123,7 +123,7 @@ static inline void rq_sched_info_depart(struct rq *rq, unsigned long long delta) { if (rq) - rq->rq_sched_info.cpu_time += delta; + rq->rq_cpu_time += delta; } static inline void @@ -236,7 +236,6 @@ static inline void sched_info_depart(struct task_struct *t) unsigned long long delta = task_rq(t)->clock - t->sched_info.last_arrival; - t->sched_info.cpu_time += delta; rq_sched_info_depart(task_rq(t), delta); if (t->state == TASK_RUNNING) -- cgit v1.2.3 From 74c8a6130486bed224e960790f4aa72dd09c061e Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Wed, 17 Dec 2008 19:40:33 +0900 Subject: locking, irq: enclose irq_desc_lock_class in CONFIG_LOCKDEP Impact: simplify code commit "08678b0: generic: sparse irqs: use irq_desc() [...]" introduced the irq_desc_lock_class variable. But it is used only if CONFIG_SPARSE_IRQ=Y or CONFIG_TRACE_IRQFLAGS=Y. Otherwise, following warnings happen: CC kernel/irq/handle.o kernel/irq/handle.c:26: warning: 'irq_desc_lock_class' defined but not used Actually, current early_init_irq_lock_class has a bit strange and messy ifdef. In addition, it is not valueable. 1. this function is protected by !CONFIG_SPARSE_IRQ, but that is not necessary. if CONFIG_SPARSE_IRQ=Y, desc of all irq number are initialized by NULL at first - then this function calling is safe. 2. this function protected by CONFIG_TRACE_IRQFLAGS too. but it is not necessary either, because lockdep_set_class() doesn't have bad side effect even if CONFIG_TRACE_IRQFLAGS=n. This patch bloat kernel size a bit on CONFIG_TRACE_IRQFLAGS=n and CONFIG_SPARSE_IRQ=Y - but that's ok. early_init_irq_lock_class() is not a fastpatch at all. To avoid messy ifdefs is more important than a few bytes diet. Signed-off-by: KOSAKI Motohiro Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 2 +- kernel/irq/handle.c | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 29aec6e10020..9dba554c802c 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -377,7 +377,7 @@ do { \ #endif /* CONFIG_LOCK_STAT */ -#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_GENERIC_HARDIRQS) +#ifdef CONFIG_GENERIC_HARDIRQS extern void early_init_irq_lock_class(void); #else static inline void early_init_irq_lock_class(void) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index f1a23069c20a..6492400cb50d 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -422,11 +422,8 @@ out: } #endif - -#ifdef CONFIG_TRACE_IRQFLAGS void early_init_irq_lock_class(void) { -#ifndef CONFIG_SPARSE_IRQ struct irq_desc *desc; int i; @@ -436,9 +433,7 @@ void early_init_irq_lock_class(void) lockdep_set_class(&desc->lock, &irq_desc_lock_class); } -#endif } -#endif #ifdef CONFIG_SPARSE_IRQ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) -- cgit v1.2.3 From 40aa4a30d0fd075fb934de4ee8163056827052ab Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 16 Dec 2008 10:15:12 +0000 Subject: ASoC: Add WM8350 AudioPlus codec driver The WM8350 is an integrated audio and power management subsystem which provides a single-chip solution for portable audio and multimedia systems. The integrated audio CODEC provides all the necessary functions for high-quality stereo recording and playback. Programmable on-chip amplifiers allow for the direct connection of headphones and microphones with a minimum of external components. A programmable low-noise bias voltage is available to feed one or more electret microphones. Additional audio features include programmable high-pass filter in the ADC input path. This driver was originally written by Liam Girdwood with further updates from me. Signed-off-by: Mark Brown --- include/linux/mfd/wm8350/audio.h | 38 +- sound/soc/codecs/Kconfig | 4 + sound/soc/codecs/Makefile | 2 + sound/soc/codecs/wm8350.c | 1583 ++++++++++++++++++++++++++++++++++++++ sound/soc/codecs/wm8350.h | 20 + 5 files changed, 1643 insertions(+), 4 deletions(-) create mode 100644 sound/soc/codecs/wm8350.c create mode 100644 sound/soc/codecs/wm8350.h (limited to 'include') diff --git a/include/linux/mfd/wm8350/audio.h b/include/linux/mfd/wm8350/audio.h index 217bb22ebb8e..af95a1d2f3a1 100644 --- a/include/linux/mfd/wm8350/audio.h +++ b/include/linux/mfd/wm8350/audio.h @@ -1,7 +1,7 @@ /* * audio.h -- Audio Driver for Wolfson WM8350 PMIC * - * Copyright 2007 Wolfson Microelectronics PLC + * Copyright 2007, 2008 Wolfson Microelectronics PLC * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -70,9 +70,9 @@ #define WM8350_CODEC_ISEL_0_5 3 /* x0.5 */ #define WM8350_VMID_OFF 0 -#define WM8350_VMID_500K 1 -#define WM8350_VMID_100K 2 -#define WM8350_VMID_10K 3 +#define WM8350_VMID_300K 1 +#define WM8350_VMID_50K 2 +#define WM8350_VMID_5K 3 /* * R40 (0x28) - Clock Control 1 @@ -591,8 +591,38 @@ #define WM8350_IRQ_CODEC_MICSCD 41 #define WM8350_IRQ_CODEC_MICD 42 +/* + * WM8350 Platform data. + * + * This must be initialised per platform for best audio performance. + * Please see WM8350 datasheet for information. + */ +struct wm8350_audio_platform_data { + int vmid_discharge_msecs; /* VMID --> OFF discharge time */ + int drain_msecs; /* OFF drain time */ + int cap_discharge_msecs; /* Cap ON (from OFF) discharge time */ + int vmid_charge_msecs; /* vmid power up time */ + u32 vmid_s_curve:2; /* vmid enable s curve speed */ + u32 dis_out4:2; /* out4 discharge speed */ + u32 dis_out3:2; /* out3 discharge speed */ + u32 dis_out2:2; /* out2 discharge speed */ + u32 dis_out1:2; /* out1 discharge speed */ + u32 vroi_out4:1; /* out4 tie off */ + u32 vroi_out3:1; /* out3 tie off */ + u32 vroi_out2:1; /* out2 tie off */ + u32 vroi_out1:1; /* out1 tie off */ + u32 vroi_enable:1; /* enable tie off */ + u32 codec_current_on:2; /* current level ON */ + u32 codec_current_standby:2; /* current level STANDBY */ + u32 codec_current_charge:2; /* codec current @ vmid charge */ +}; + +struct snd_soc_codec; + struct wm8350_codec { struct platform_device *pdev; + struct snd_soc_codec *codec; + struct wm8350_audio_platform_data *platform_data; }; #endif diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index bf68052d6924..c41289b5f586 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -13,6 +13,7 @@ config SND_SOC_ALL_CODECS select SND_SOC_TWL4030 if TWL4030_CORE select SND_SOC_UDA134X select SND_SOC_UDA1380 if I2C + select SND_SOC_WM8350 if MFD_WM8350 select SND_SOC_WM8510 if (I2C || SPI_MASTER) select SND_SOC_WM8580 if I2C select SND_SOC_WM8728 if (I2C || SPI_MASTER) @@ -100,6 +101,9 @@ config SND_SOC_UDA134X config SND_SOC_UDA1380 tristate +config SND_SOC_WM8350 + tristate + config SND_SOC_WM8510 tristate diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index 9a20fddd09c7..c4ddc9aa2bbd 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -12,6 +12,7 @@ snd-soc-tlv320aic3x-objs := tlv320aic3x.o snd-soc-twl4030-objs := twl4030.o snd-soc-uda134x-objs := uda134x.o snd-soc-uda1380-objs := uda1380.o +snd-soc-wm8350-objs := wm8350.o snd-soc-wm8510-objs := wm8510.o snd-soc-wm8580-objs := wm8580.o snd-soc-wm8728-objs := wm8728.o @@ -39,6 +40,7 @@ obj-$(CONFIG_SND_SOC_TLV320AIC3X) += snd-soc-tlv320aic3x.o obj-$(CONFIG_SND_SOC_TWL4030) += snd-soc-twl4030.o obj-$(CONFIG_SND_SOC_UDA134X) += snd-soc-uda134x.o obj-$(CONFIG_SND_SOC_UDA1380) += snd-soc-uda1380.o +obj-$(CONFIG_SND_SOC_WM8350) += snd-soc-wm8350.o obj-$(CONFIG_SND_SOC_WM8510) += snd-soc-wm8510.o obj-$(CONFIG_SND_SOC_WM8580) += snd-soc-wm8580.o obj-$(CONFIG_SND_SOC_WM8728) += snd-soc-wm8728.o diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c new file mode 100644 index 000000000000..4bbfb5a5894b --- /dev/null +++ b/sound/soc/codecs/wm8350.c @@ -0,0 +1,1583 @@ +/* + * wm8350.c -- WM8350 ALSA SoC audio driver + * + * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC. + * + * Author: Liam Girdwood + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "wm8350.h" + +#define WM8350_OUTn_0dB 0x39 + +#define WM8350_RAMP_NONE 0 +#define WM8350_RAMP_UP 1 +#define WM8350_RAMP_DOWN 2 + +/* We only include the analogue supplies here; the digital supplies + * need to be available well before this driver can be probed. + */ +static const char *supply_names[] = { + "AVDD", + "HPVDD", +}; + +struct wm8350_output { + u16 active; + u16 left_vol; + u16 right_vol; + u16 ramp; + u16 mute; +}; + +struct wm8350_data { + struct snd_soc_codec codec; + struct wm8350_output out1; + struct wm8350_output out2; + struct regulator_bulk_data supplies[ARRAY_SIZE(supply_names)]; +}; + +static unsigned int wm8350_codec_cache_read(struct snd_soc_codec *codec, + unsigned int reg) +{ + struct wm8350 *wm8350 = codec->control_data; + return wm8350->reg_cache[reg]; +} + +static unsigned int wm8350_codec_read(struct snd_soc_codec *codec, + unsigned int reg) +{ + struct wm8350 *wm8350 = codec->control_data; + return wm8350_reg_read(wm8350, reg); +} + +static int wm8350_codec_write(struct snd_soc_codec *codec, unsigned int reg, + unsigned int value) +{ + struct wm8350 *wm8350 = codec->control_data; + return wm8350_reg_write(wm8350, reg, value); +} + +/* + * Ramp OUT1 PGA volume to minimise pops at stream startup and shutdown. + */ +static inline int wm8350_out1_ramp_step(struct snd_soc_codec *codec) +{ + struct wm8350_data *wm8350_data = codec->private_data; + struct wm8350_output *out1 = &wm8350_data->out1; + struct wm8350 *wm8350 = codec->control_data; + int left_complete = 0, right_complete = 0; + u16 reg, val; + + /* left channel */ + reg = wm8350_reg_read(wm8350, WM8350_LOUT1_VOLUME); + val = (reg & WM8350_OUT1L_VOL_MASK) >> WM8350_OUT1L_VOL_SHIFT; + + if (out1->ramp == WM8350_RAMP_UP) { + /* ramp step up */ + if (val < out1->left_vol) { + val++; + reg &= ~WM8350_OUT1L_VOL_MASK; + wm8350_reg_write(wm8350, WM8350_LOUT1_VOLUME, + reg | (val << WM8350_OUT1L_VOL_SHIFT)); + } else + left_complete = 1; + } else if (out1->ramp == WM8350_RAMP_DOWN) { + /* ramp step down */ + if (val > 0) { + val--; + reg &= ~WM8350_OUT1L_VOL_MASK; + wm8350_reg_write(wm8350, WM8350_LOUT1_VOLUME, + reg | (val << WM8350_OUT1L_VOL_SHIFT)); + } else + left_complete = 1; + } else + return 1; + + /* right channel */ + reg = wm8350_reg_read(wm8350, WM8350_ROUT1_VOLUME); + val = (reg & WM8350_OUT1R_VOL_MASK) >> WM8350_OUT1R_VOL_SHIFT; + if (out1->ramp == WM8350_RAMP_UP) { + /* ramp step up */ + if (val < out1->right_vol) { + val++; + reg &= ~WM8350_OUT1R_VOL_MASK; + wm8350_reg_write(wm8350, WM8350_ROUT1_VOLUME, + reg | (val << WM8350_OUT1R_VOL_SHIFT)); + } else + right_complete = 1; + } else if (out1->ramp == WM8350_RAMP_DOWN) { + /* ramp step down */ + if (val > 0) { + val--; + reg &= ~WM8350_OUT1R_VOL_MASK; + wm8350_reg_write(wm8350, WM8350_ROUT1_VOLUME, + reg | (val << WM8350_OUT1R_VOL_SHIFT)); + } else + right_complete = 1; + } + + /* only hit the update bit if either volume has changed this step */ + if (!left_complete || !right_complete) + wm8350_set_bits(wm8350, WM8350_LOUT1_VOLUME, WM8350_OUT1_VU); + + return left_complete & right_complete; +} + +/* + * Ramp OUT2 PGA volume to minimise pops at stream startup and shutdown. + */ +static inline int wm8350_out2_ramp_step(struct snd_soc_codec *codec) +{ + struct wm8350_data *wm8350_data = codec->private_data; + struct wm8350_output *out2 = &wm8350_data->out2; + struct wm8350 *wm8350 = codec->control_data; + int left_complete = 0, right_complete = 0; + u16 reg, val; + + /* left channel */ + reg = wm8350_reg_read(wm8350, WM8350_LOUT2_VOLUME); + val = (reg & WM8350_OUT2L_VOL_MASK) >> WM8350_OUT1L_VOL_SHIFT; + if (out2->ramp == WM8350_RAMP_UP) { + /* ramp step up */ + if (val < out2->left_vol) { + val++; + reg &= ~WM8350_OUT2L_VOL_MASK; + wm8350_reg_write(wm8350, WM8350_LOUT2_VOLUME, + reg | (val << WM8350_OUT1L_VOL_SHIFT)); + } else + left_complete = 1; + } else if (out2->ramp == WM8350_RAMP_DOWN) { + /* ramp step down */ + if (val > 0) { + val--; + reg &= ~WM8350_OUT2L_VOL_MASK; + wm8350_reg_write(wm8350, WM8350_LOUT2_VOLUME, + reg | (val << WM8350_OUT1L_VOL_SHIFT)); + } else + left_complete = 1; + } else + return 1; + + /* right channel */ + reg = wm8350_reg_read(wm8350, WM8350_ROUT2_VOLUME); + val = (reg & WM8350_OUT2R_VOL_MASK) >> WM8350_OUT1R_VOL_SHIFT; + if (out2->ramp == WM8350_RAMP_UP) { + /* ramp step up */ + if (val < out2->right_vol) { + val++; + reg &= ~WM8350_OUT2R_VOL_MASK; + wm8350_reg_write(wm8350, WM8350_ROUT2_VOLUME, + reg | (val << WM8350_OUT1R_VOL_SHIFT)); + } else + right_complete = 1; + } else if (out2->ramp == WM8350_RAMP_DOWN) { + /* ramp step down */ + if (val > 0) { + val--; + reg &= ~WM8350_OUT2R_VOL_MASK; + wm8350_reg_write(wm8350, WM8350_ROUT2_VOLUME, + reg | (val << WM8350_OUT1R_VOL_SHIFT)); + } else + right_complete = 1; + } + + /* only hit the update bit if either volume has changed this step */ + if (!left_complete || !right_complete) + wm8350_set_bits(wm8350, WM8350_LOUT2_VOLUME, WM8350_OUT2_VU); + + return left_complete & right_complete; +} + +/* + * This work ramps both output PGAs at stream start/stop time to + * minimise pop associated with DAPM power switching. + * It's best to enable Zero Cross when ramping occurs to minimise any + * zipper noises. + */ +static void wm8350_pga_work(struct work_struct *work) +{ + struct snd_soc_codec *codec = + container_of(work, struct snd_soc_codec, delayed_work.work); + struct wm8350_data *wm8350_data = codec->private_data; + struct wm8350_output *out1 = &wm8350_data->out1, + *out2 = &wm8350_data->out2; + int i, out1_complete, out2_complete; + + /* do we need to ramp at all ? */ + if (out1->ramp == WM8350_RAMP_NONE && out2->ramp == WM8350_RAMP_NONE) + return; + + /* PGA volumes have 6 bits of resolution to ramp */ + for (i = 0; i <= 63; i++) { + out1_complete = 1, out2_complete = 1; + if (out1->ramp != WM8350_RAMP_NONE) + out1_complete = wm8350_out1_ramp_step(codec); + if (out2->ramp != WM8350_RAMP_NONE) + out2_complete = wm8350_out2_ramp_step(codec); + + /* ramp finished ? */ + if (out1_complete && out2_complete) + break; + + /* we need to delay longer on the up ramp */ + if (out1->ramp == WM8350_RAMP_UP || + out2->ramp == WM8350_RAMP_UP) { + /* delay is longer over 0dB as increases are larger */ + if (i >= WM8350_OUTn_0dB) + schedule_timeout_interruptible(msecs_to_jiffies + (2)); + else + schedule_timeout_interruptible(msecs_to_jiffies + (1)); + } else + udelay(50); /* doesn't matter if we delay longer */ + } + + out1->ramp = WM8350_RAMP_NONE; + out2->ramp = WM8350_RAMP_NONE; +} + +/* + * WM8350 Controls + */ + +static int pga_event(struct snd_soc_dapm_widget *w, + struct snd_kcontrol *kcontrol, int event) +{ + struct snd_soc_codec *codec = w->codec; + struct wm8350_data *wm8350_data = codec->private_data; + struct wm8350_output *out; + + switch (w->shift) { + case 0: + case 1: + out = &wm8350_data->out1; + break; + case 2: + case 3: + out = &wm8350_data->out2; + break; + + default: + BUG(); + return -1; + } + + switch (event) { + case SND_SOC_DAPM_POST_PMU: + out->ramp = WM8350_RAMP_UP; + out->active = 1; + + if (!delayed_work_pending(&codec->delayed_work)) + schedule_delayed_work(&codec->delayed_work, + msecs_to_jiffies(1)); + break; + + case SND_SOC_DAPM_PRE_PMD: + out->ramp = WM8350_RAMP_DOWN; + out->active = 0; + + if (!delayed_work_pending(&codec->delayed_work)) + schedule_delayed_work(&codec->delayed_work, + msecs_to_jiffies(1)); + break; + } + + return 0; +} + +static int wm8350_put_volsw_2r_vu(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); + struct wm8350_data *wm8350_priv = codec->private_data; + struct wm8350_output *out = NULL; + struct soc_mixer_control *mc = + (struct soc_mixer_control *)kcontrol->private_value; + int ret; + unsigned int reg = mc->reg; + u16 val; + + /* For OUT1 and OUT2 we shadow the values and only actually write + * them out when active in order to ensure the amplifier comes on + * as quietly as possible. */ + switch (reg) { + case WM8350_LOUT1_VOLUME: + out = &wm8350_priv->out1; + break; + case WM8350_LOUT2_VOLUME: + out = &wm8350_priv->out2; + break; + default: + break; + } + + if (out) { + out->left_vol = ucontrol->value.integer.value[0]; + out->right_vol = ucontrol->value.integer.value[1]; + if (!out->active) + return 1; + } + + ret = snd_soc_put_volsw_2r(kcontrol, ucontrol); + if (ret < 0) + return ret; + + /* now hit the volume update bits (always bit 8) */ + val = wm8350_codec_read(codec, reg); + wm8350_codec_write(codec, reg, val | WM8350_OUT1_VU); + return 1; +} + +static int wm8350_get_volsw_2r(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); + struct wm8350_data *wm8350_priv = codec->private_data; + struct wm8350_output *out1 = &wm8350_priv->out1; + struct wm8350_output *out2 = &wm8350_priv->out2; + struct soc_mixer_control *mc = + (struct soc_mixer_control *)kcontrol->private_value; + unsigned int reg = mc->reg; + + /* If these are cached registers use the cache */ + switch (reg) { + case WM8350_LOUT1_VOLUME: + ucontrol->value.integer.value[0] = out1->left_vol; + ucontrol->value.integer.value[1] = out1->right_vol; + return 0; + + case WM8350_LOUT2_VOLUME: + ucontrol->value.integer.value[0] = out2->left_vol; + ucontrol->value.integer.value[1] = out2->right_vol; + return 0; + + default: + break; + } + + return snd_soc_get_volsw_2r(kcontrol, ucontrol); +} + +/* double control with volume update */ +#define SOC_WM8350_DOUBLE_R_TLV(xname, reg_left, reg_right, xshift, xmax, \ + xinvert, tlv_array) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = (xname), \ + .access = SNDRV_CTL_ELEM_ACCESS_TLV_READ | \ + SNDRV_CTL_ELEM_ACCESS_READWRITE | \ + SNDRV_CTL_ELEM_ACCESS_VOLATILE, \ + .tlv.p = (tlv_array), \ + .info = snd_soc_info_volsw_2r, \ + .get = wm8350_get_volsw_2r, .put = wm8350_put_volsw_2r_vu, \ + .private_value = (unsigned long)&(struct soc_mixer_control) \ + {.reg = reg_left, .rreg = reg_right, .shift = xshift, \ + .rshift = xshift, .max = xmax, .invert = xinvert}, } + +static const char *wm8350_deemp[] = { "None", "32kHz", "44.1kHz", "48kHz" }; +static const char *wm8350_pol[] = { "Normal", "Inv R", "Inv L", "Inv L & R" }; +static const char *wm8350_dacmutem[] = { "Normal", "Soft" }; +static const char *wm8350_dacmutes[] = { "Fast", "Slow" }; +static const char *wm8350_dacfilter[] = { "Normal", "Sloping" }; +static const char *wm8350_adcfilter[] = { "None", "High Pass" }; +static const char *wm8350_adchp[] = { "44.1kHz", "8kHz", "16kHz", "32kHz" }; +static const char *wm8350_lr[] = { "Left", "Right" }; + +static const struct soc_enum wm8350_enum[] = { + SOC_ENUM_SINGLE(WM8350_DAC_CONTROL, 4, 4, wm8350_deemp), + SOC_ENUM_SINGLE(WM8350_DAC_CONTROL, 0, 4, wm8350_pol), + SOC_ENUM_SINGLE(WM8350_DAC_MUTE_VOLUME, 14, 2, wm8350_dacmutem), + SOC_ENUM_SINGLE(WM8350_DAC_MUTE_VOLUME, 13, 2, wm8350_dacmutes), + SOC_ENUM_SINGLE(WM8350_DAC_MUTE_VOLUME, 12, 2, wm8350_dacfilter), + SOC_ENUM_SINGLE(WM8350_ADC_CONTROL, 15, 2, wm8350_adcfilter), + SOC_ENUM_SINGLE(WM8350_ADC_CONTROL, 8, 4, wm8350_adchp), + SOC_ENUM_SINGLE(WM8350_ADC_CONTROL, 0, 4, wm8350_pol), + SOC_ENUM_SINGLE(WM8350_INPUT_MIXER_VOLUME, 15, 2, wm8350_lr), +}; + +static DECLARE_TLV_DB_LINEAR(pre_amp_tlv, -1200, 3525); +static DECLARE_TLV_DB_LINEAR(out_pga_tlv, -5700, 600); +static DECLARE_TLV_DB_SCALE(dac_pcm_tlv, -7163, 36, 1); +static DECLARE_TLV_DB_SCALE(adc_pcm_tlv, -12700, 50, 1); +static DECLARE_TLV_DB_SCALE(out_mix_tlv, -1500, 300, 1); + +static const unsigned int capture_sd_tlv[] = { + TLV_DB_RANGE_HEAD(2), + 0, 12, TLV_DB_SCALE_ITEM(-3600, 300, 1), + 13, 15, TLV_DB_SCALE_ITEM(0, 0, 0), +}; + +static const struct snd_kcontrol_new wm8350_snd_controls[] = { + SOC_ENUM("Playback Deemphasis", wm8350_enum[0]), + SOC_ENUM("Playback DAC Inversion", wm8350_enum[1]), + SOC_WM8350_DOUBLE_R_TLV("Playback PCM Volume", + WM8350_DAC_DIGITAL_VOLUME_L, + WM8350_DAC_DIGITAL_VOLUME_R, + 0, 255, 0, dac_pcm_tlv), + SOC_ENUM("Playback PCM Mute Function", wm8350_enum[2]), + SOC_ENUM("Playback PCM Mute Speed", wm8350_enum[3]), + SOC_ENUM("Playback PCM Filter", wm8350_enum[4]), + SOC_ENUM("Capture PCM Filter", wm8350_enum[5]), + SOC_ENUM("Capture PCM HP Filter", wm8350_enum[6]), + SOC_ENUM("Capture ADC Inversion", wm8350_enum[7]), + SOC_WM8350_DOUBLE_R_TLV("Capture PCM Volume", + WM8350_ADC_DIGITAL_VOLUME_L, + WM8350_ADC_DIGITAL_VOLUME_R, + 0, 255, 0, adc_pcm_tlv), + SOC_DOUBLE_TLV("Capture Sidetone Volume", + WM8350_ADC_DIVIDER, + 8, 4, 15, 1, capture_sd_tlv), + SOC_WM8350_DOUBLE_R_TLV("Capture Volume", + WM8350_LEFT_INPUT_VOLUME, + WM8350_RIGHT_INPUT_VOLUME, + 2, 63, 0, pre_amp_tlv), + SOC_DOUBLE_R("Capture ZC Switch", + WM8350_LEFT_INPUT_VOLUME, + WM8350_RIGHT_INPUT_VOLUME, 13, 1, 0), + SOC_SINGLE_TLV("Left Input Left Sidetone Volume", + WM8350_OUTPUT_LEFT_MIXER_VOLUME, 1, 7, 0, out_mix_tlv), + SOC_SINGLE_TLV("Left Input Right Sidetone Volume", + WM8350_OUTPUT_LEFT_MIXER_VOLUME, + 5, 7, 0, out_mix_tlv), + SOC_SINGLE_TLV("Left Input Bypass Volume", + WM8350_OUTPUT_LEFT_MIXER_VOLUME, + 9, 7, 0, out_mix_tlv), + SOC_SINGLE_TLV("Right Input Left Sidetone Volume", + WM8350_OUTPUT_RIGHT_MIXER_VOLUME, + 1, 7, 0, out_mix_tlv), + SOC_SINGLE_TLV("Right Input Right Sidetone Volume", + WM8350_OUTPUT_RIGHT_MIXER_VOLUME, + 5, 7, 0, out_mix_tlv), + SOC_SINGLE_TLV("Right Input Bypass Volume", + WM8350_OUTPUT_RIGHT_MIXER_VOLUME, + 13, 7, 0, out_mix_tlv), + SOC_SINGLE("Left Input Mixer +20dB Switch", + WM8350_INPUT_MIXER_VOLUME_L, 0, 1, 0), + SOC_SINGLE("Right Input Mixer +20dB Switch", + WM8350_INPUT_MIXER_VOLUME_R, 0, 1, 0), + SOC_SINGLE_TLV("Out4 Capture Volume", + WM8350_INPUT_MIXER_VOLUME, + 1, 7, 0, out_mix_tlv), + SOC_WM8350_DOUBLE_R_TLV("Out1 Playback Volume", + WM8350_LOUT1_VOLUME, + WM8350_ROUT1_VOLUME, + 2, 63, 0, out_pga_tlv), + SOC_DOUBLE_R("Out1 Playback ZC Switch", + WM8350_LOUT1_VOLUME, + WM8350_ROUT1_VOLUME, 13, 1, 0), + SOC_WM8350_DOUBLE_R_TLV("Out2 Playback Volume", + WM8350_LOUT2_VOLUME, + WM8350_ROUT2_VOLUME, + 2, 63, 0, out_pga_tlv), + SOC_DOUBLE_R("Out2 Playback ZC Switch", WM8350_LOUT2_VOLUME, + WM8350_ROUT2_VOLUME, 13, 1, 0), + SOC_SINGLE("Out2 Right Invert Switch", WM8350_ROUT2_VOLUME, 10, 1, 0), + SOC_SINGLE_TLV("Out2 Beep Volume", WM8350_BEEP_VOLUME, + 5, 7, 0, out_mix_tlv), + + SOC_DOUBLE_R("Out1 Playback Switch", + WM8350_LOUT1_VOLUME, + WM8350_ROUT1_VOLUME, + 14, 1, 1), + SOC_DOUBLE_R("Out2 Playback Switch", + WM8350_LOUT2_VOLUME, + WM8350_ROUT2_VOLUME, + 14, 1, 1), +}; + +/* + * DAPM Controls + */ + +/* Left Playback Mixer */ +static const struct snd_kcontrol_new wm8350_left_play_mixer_controls[] = { + SOC_DAPM_SINGLE("Playback Switch", + WM8350_LEFT_MIXER_CONTROL, 11, 1, 0), + SOC_DAPM_SINGLE("Left Bypass Switch", + WM8350_LEFT_MIXER_CONTROL, 2, 1, 0), + SOC_DAPM_SINGLE("Right Playback Switch", + WM8350_LEFT_MIXER_CONTROL, 12, 1, 0), + SOC_DAPM_SINGLE("Left Sidetone Switch", + WM8350_LEFT_MIXER_CONTROL, 0, 1, 0), + SOC_DAPM_SINGLE("Right Sidetone Switch", + WM8350_LEFT_MIXER_CONTROL, 1, 1, 0), +}; + +/* Right Playback Mixer */ +static const struct snd_kcontrol_new wm8350_right_play_mixer_controls[] = { + SOC_DAPM_SINGLE("Playback Switch", + WM8350_RIGHT_MIXER_CONTROL, 12, 1, 0), + SOC_DAPM_SINGLE("Right Bypass Switch", + WM8350_RIGHT_MIXER_CONTROL, 3, 1, 0), + SOC_DAPM_SINGLE("Left Playback Switch", + WM8350_RIGHT_MIXER_CONTROL, 11, 1, 0), + SOC_DAPM_SINGLE("Left Sidetone Switch", + WM8350_RIGHT_MIXER_CONTROL, 0, 1, 0), + SOC_DAPM_SINGLE("Right Sidetone Switch", + WM8350_RIGHT_MIXER_CONTROL, 1, 1, 0), +}; + +/* Out4 Mixer */ +static const struct snd_kcontrol_new wm8350_out4_mixer_controls[] = { + SOC_DAPM_SINGLE("Right Playback Switch", + WM8350_OUT4_MIXER_CONTROL, 12, 1, 0), + SOC_DAPM_SINGLE("Left Playback Switch", + WM8350_OUT4_MIXER_CONTROL, 11, 1, 0), + SOC_DAPM_SINGLE("Right Capture Switch", + WM8350_OUT4_MIXER_CONTROL, 9, 1, 0), + SOC_DAPM_SINGLE("Out3 Playback Switch", + WM8350_OUT4_MIXER_CONTROL, 2, 1, 0), + SOC_DAPM_SINGLE("Right Mixer Switch", + WM8350_OUT4_MIXER_CONTROL, 1, 1, 0), + SOC_DAPM_SINGLE("Left Mixer Switch", + WM8350_OUT4_MIXER_CONTROL, 0, 1, 0), +}; + +/* Out3 Mixer */ +static const struct snd_kcontrol_new wm8350_out3_mixer_controls[] = { + SOC_DAPM_SINGLE("Left Playback Switch", + WM8350_OUT3_MIXER_CONTROL, 11, 1, 0), + SOC_DAPM_SINGLE("Left Capture Switch", + WM8350_OUT3_MIXER_CONTROL, 8, 1, 0), + SOC_DAPM_SINGLE("Out4 Playback Switch", + WM8350_OUT3_MIXER_CONTROL, 3, 1, 0), + SOC_DAPM_SINGLE("Left Mixer Switch", + WM8350_OUT3_MIXER_CONTROL, 0, 1, 0), +}; + +/* Left Input Mixer */ +static const struct snd_kcontrol_new wm8350_left_capt_mixer_controls[] = { + SOC_DAPM_SINGLE_TLV("L2 Capture Volume", + WM8350_INPUT_MIXER_VOLUME_L, 1, 7, 0, out_mix_tlv), + SOC_DAPM_SINGLE_TLV("L3 Capture Volume", + WM8350_INPUT_MIXER_VOLUME_L, 9, 7, 0, out_mix_tlv), + SOC_DAPM_SINGLE("PGA Capture Switch", + WM8350_LEFT_INPUT_VOLUME, 14, 1, 0), +}; + +/* Right Input Mixer */ +static const struct snd_kcontrol_new wm8350_right_capt_mixer_controls[] = { + SOC_DAPM_SINGLE_TLV("L2 Capture Volume", + WM8350_INPUT_MIXER_VOLUME_R, 5, 7, 0, out_mix_tlv), + SOC_DAPM_SINGLE_TLV("L3 Capture Volume", + WM8350_INPUT_MIXER_VOLUME_R, 13, 7, 0, out_mix_tlv), + SOC_DAPM_SINGLE("PGA Capture Switch", + WM8350_RIGHT_INPUT_VOLUME, 14, 1, 0), +}; + +/* Left Mic Mixer */ +static const struct snd_kcontrol_new wm8350_left_mic_mixer_controls[] = { + SOC_DAPM_SINGLE("INN Capture Switch", WM8350_INPUT_CONTROL, 1, 1, 0), + SOC_DAPM_SINGLE("INP Capture Switch", WM8350_INPUT_CONTROL, 0, 1, 0), + SOC_DAPM_SINGLE("IN2 Capture Switch", WM8350_INPUT_CONTROL, 2, 1, 0), +}; + +/* Right Mic Mixer */ +static const struct snd_kcontrol_new wm8350_right_mic_mixer_controls[] = { + SOC_DAPM_SINGLE("INN Capture Switch", WM8350_INPUT_CONTROL, 9, 1, 0), + SOC_DAPM_SINGLE("INP Capture Switch", WM8350_INPUT_CONTROL, 8, 1, 0), + SOC_DAPM_SINGLE("IN2 Capture Switch", WM8350_INPUT_CONTROL, 10, 1, 0), +}; + +/* Beep Switch */ +static const struct snd_kcontrol_new wm8350_beep_switch_controls = +SOC_DAPM_SINGLE("Switch", WM8350_BEEP_VOLUME, 15, 1, 1); + +/* Out4 Capture Mux */ +static const struct snd_kcontrol_new wm8350_out4_capture_controls = +SOC_DAPM_ENUM("Route", wm8350_enum[8]); + +static const struct snd_soc_dapm_widget wm8350_dapm_widgets[] = { + + SND_SOC_DAPM_PGA("IN3R PGA", WM8350_POWER_MGMT_2, 11, 0, NULL, 0), + SND_SOC_DAPM_PGA("IN3L PGA", WM8350_POWER_MGMT_2, 10, 0, NULL, 0), + SND_SOC_DAPM_PGA_E("Right Out2 PGA", WM8350_POWER_MGMT_3, 3, 0, NULL, + 0, pga_event, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD), + SND_SOC_DAPM_PGA_E("Left Out2 PGA", WM8350_POWER_MGMT_3, 2, 0, NULL, 0, + pga_event, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD), + SND_SOC_DAPM_PGA_E("Right Out1 PGA", WM8350_POWER_MGMT_3, 1, 0, NULL, + 0, pga_event, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD), + SND_SOC_DAPM_PGA_E("Left Out1 PGA", WM8350_POWER_MGMT_3, 0, 0, NULL, 0, + pga_event, + SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD), + + SND_SOC_DAPM_MIXER("Right Capture Mixer", WM8350_POWER_MGMT_2, + 7, 0, &wm8350_right_capt_mixer_controls[0], + ARRAY_SIZE(wm8350_right_capt_mixer_controls)), + + SND_SOC_DAPM_MIXER("Left Capture Mixer", WM8350_POWER_MGMT_2, + 6, 0, &wm8350_left_capt_mixer_controls[0], + ARRAY_SIZE(wm8350_left_capt_mixer_controls)), + + SND_SOC_DAPM_MIXER("Out4 Mixer", WM8350_POWER_MGMT_2, 5, 0, + &wm8350_out4_mixer_controls[0], + ARRAY_SIZE(wm8350_out4_mixer_controls)), + + SND_SOC_DAPM_MIXER("Out3 Mixer", WM8350_POWER_MGMT_2, 4, 0, + &wm8350_out3_mixer_controls[0], + ARRAY_SIZE(wm8350_out3_mixer_controls)), + + SND_SOC_DAPM_MIXER("Right Playback Mixer", WM8350_POWER_MGMT_2, 1, 0, + &wm8350_right_play_mixer_controls[0], + ARRAY_SIZE(wm8350_right_play_mixer_controls)), + + SND_SOC_DAPM_MIXER("Left Playback Mixer", WM8350_POWER_MGMT_2, 0, 0, + &wm8350_left_play_mixer_controls[0], + ARRAY_SIZE(wm8350_left_play_mixer_controls)), + + SND_SOC_DAPM_MIXER("Left Mic Mixer", WM8350_POWER_MGMT_2, 8, 0, + &wm8350_left_mic_mixer_controls[0], + ARRAY_SIZE(wm8350_left_mic_mixer_controls)), + + SND_SOC_DAPM_MIXER("Right Mic Mixer", WM8350_POWER_MGMT_2, 9, 0, + &wm8350_right_mic_mixer_controls[0], + ARRAY_SIZE(wm8350_right_mic_mixer_controls)), + + /* virtual mixer for Beep and Out2R */ + SND_SOC_DAPM_MIXER("Out2 Mixer", SND_SOC_NOPM, 0, 0, NULL, 0), + + SND_SOC_DAPM_SWITCH("Beep", WM8350_POWER_MGMT_3, 7, 0, + &wm8350_beep_switch_controls), + + SND_SOC_DAPM_ADC("Right ADC", "Right Capture", + WM8350_POWER_MGMT_4, 3, 0), + SND_SOC_DAPM_ADC("Left ADC", "Left Capture", + WM8350_POWER_MGMT_4, 2, 0), + SND_SOC_DAPM_DAC("Right DAC", "Right Playback", + WM8350_POWER_MGMT_4, 5, 0), + SND_SOC_DAPM_DAC("Left DAC", "Left Playback", + WM8350_POWER_MGMT_4, 4, 0), + + SND_SOC_DAPM_MICBIAS("Mic Bias", WM8350_POWER_MGMT_1, 4, 0), + + SND_SOC_DAPM_MUX("Out4 Capture Channel", SND_SOC_NOPM, 0, 0, + &wm8350_out4_capture_controls), + + SND_SOC_DAPM_OUTPUT("OUT1R"), + SND_SOC_DAPM_OUTPUT("OUT1L"), + SND_SOC_DAPM_OUTPUT("OUT2R"), + SND_SOC_DAPM_OUTPUT("OUT2L"), + SND_SOC_DAPM_OUTPUT("OUT3"), + SND_SOC_DAPM_OUTPUT("OUT4"), + + SND_SOC_DAPM_INPUT("IN1RN"), + SND_SOC_DAPM_INPUT("IN1RP"), + SND_SOC_DAPM_INPUT("IN2R"), + SND_SOC_DAPM_INPUT("IN1LP"), + SND_SOC_DAPM_INPUT("IN1LN"), + SND_SOC_DAPM_INPUT("IN2L"), + SND_SOC_DAPM_INPUT("IN3R"), + SND_SOC_DAPM_INPUT("IN3L"), +}; + +static const struct snd_soc_dapm_route audio_map[] = { + + /* left playback mixer */ + {"Left Playback Mixer", "Playback Switch", "Left DAC"}, + {"Left Playback Mixer", "Left Bypass Switch", "IN3L PGA"}, + {"Left Playback Mixer", "Right Playback Switch", "Right DAC"}, + {"Left Playback Mixer", "Left Sidetone Switch", "Left Mic Mixer"}, + {"Left Playback Mixer", "Right Sidetone Switch", "Right Mic Mixer"}, + + /* right playback mixer */ + {"Right Playback Mixer", "Playback Switch", "Right DAC"}, + {"Right Playback Mixer", "Right Bypass Switch", "IN3R PGA"}, + {"Right Playback Mixer", "Left Playback Switch", "Left DAC"}, + {"Right Playback Mixer", "Left Sidetone Switch", "Left Mic Mixer"}, + {"Right Playback Mixer", "Right Sidetone Switch", "Right Mic Mixer"}, + + /* out4 playback mixer */ + {"Out4 Mixer", "Right Playback Switch", "Right DAC"}, + {"Out4 Mixer", "Left Playback Switch", "Left DAC"}, + {"Out4 Mixer", "Right Capture Switch", "Right Capture Mixer"}, + {"Out4 Mixer", "Out3 Playback Switch", "Out3 Mixer"}, + {"Out4 Mixer", "Right Mixer Switch", "Right Playback Mixer"}, + {"Out4 Mixer", "Left Mixer Switch", "Left Playback Mixer"}, + {"OUT4", NULL, "Out4 Mixer"}, + + /* out3 playback mixer */ + {"Out3 Mixer", "Left Playback Switch", "Left DAC"}, + {"Out3 Mixer", "Left Capture Switch", "Left Capture Mixer"}, + {"Out3 Mixer", "Left Mixer Switch", "Left Playback Mixer"}, + {"Out3 Mixer", "Out4 Playback Switch", "Out4 Mixer"}, + {"OUT3", NULL, "Out3 Mixer"}, + + /* out2 */ + {"Right Out2 PGA", NULL, "Right Playback Mixer"}, + {"Left Out2 PGA", NULL, "Left Playback Mixer"}, + {"OUT2L", NULL, "Left Out2 PGA"}, + {"OUT2R", NULL, "Right Out2 PGA"}, + + /* out1 */ + {"Right Out1 PGA", NULL, "Right Playback Mixer"}, + {"Left Out1 PGA", NULL, "Left Playback Mixer"}, + {"OUT1L", NULL, "Left Out1 PGA"}, + {"OUT1R", NULL, "Right Out1 PGA"}, + + /* ADCs */ + {"Left ADC", NULL, "Left Capture Mixer"}, + {"Right ADC", NULL, "Right Capture Mixer"}, + + /* Left capture mixer */ + {"Left Capture Mixer", "L2 Capture Volume", "IN2L"}, + {"Left Capture Mixer", "L3 Capture Volume", "IN3L PGA"}, + {"Left Capture Mixer", "PGA Capture Switch", "Left Mic Mixer"}, + {"Left Capture Mixer", NULL, "Out4 Capture Channel"}, + + /* Right capture mixer */ + {"Right Capture Mixer", "L2 Capture Volume", "IN2R"}, + {"Right Capture Mixer", "L3 Capture Volume", "IN3R PGA"}, + {"Right Capture Mixer", "PGA Capture Switch", "Right Mic Mixer"}, + {"Right Capture Mixer", NULL, "Out4 Capture Channel"}, + + /* L3 Inputs */ + {"IN3L PGA", NULL, "IN3L"}, + {"IN3R PGA", NULL, "IN3R"}, + + /* Left Mic mixer */ + {"Left Mic Mixer", "INN Capture Switch", "IN1LN"}, + {"Left Mic Mixer", "INP Capture Switch", "IN1LP"}, + {"Left Mic Mixer", "IN2 Capture Switch", "IN2L"}, + + /* Right Mic mixer */ + {"Right Mic Mixer", "INN Capture Switch", "IN1RN"}, + {"Right Mic Mixer", "INP Capture Switch", "IN1RP"}, + {"Right Mic Mixer", "IN2 Capture Switch", "IN2R"}, + + /* out 4 capture */ + {"Out4 Capture Channel", NULL, "Out4 Mixer"}, + + /* Beep */ + {"Beep", NULL, "IN3R PGA"}, +}; + +static int wm8350_add_controls(struct snd_soc_codec *codec) +{ + int err, i; + + for (i = 0; i < ARRAY_SIZE(wm8350_snd_controls); i++) { + err = snd_ctl_add(codec->card, + snd_soc_cnew(&wm8350_snd_controls[i], + codec, NULL)); + if (err < 0) + return err; + } + + return 0; +} + +static int wm8350_add_widgets(struct snd_soc_codec *codec) +{ + int ret; + + ret = snd_soc_dapm_new_controls(codec, + wm8350_dapm_widgets, + ARRAY_SIZE(wm8350_dapm_widgets)); + if (ret != 0) { + dev_err(codec->dev, "dapm control register failed\n"); + return ret; + } + + /* set up audio paths */ + ret = snd_soc_dapm_add_routes(codec, audio_map, ARRAY_SIZE(audio_map)); + if (ret != 0) { + dev_err(codec->dev, "DAPM route register failed\n"); + return ret; + } + + return snd_soc_dapm_new_widgets(codec); +} + +static int wm8350_set_dai_sysclk(struct snd_soc_dai *codec_dai, + int clk_id, unsigned int freq, int dir) +{ + struct snd_soc_codec *codec = codec_dai->codec; + struct wm8350 *wm8350 = codec->control_data; + u16 fll_4; + + switch (clk_id) { + case WM8350_MCLK_SEL_MCLK: + wm8350_clear_bits(wm8350, WM8350_CLOCK_CONTROL_1, + WM8350_MCLK_SEL); + break; + case WM8350_MCLK_SEL_PLL_MCLK: + case WM8350_MCLK_SEL_PLL_DAC: + case WM8350_MCLK_SEL_PLL_ADC: + case WM8350_MCLK_SEL_PLL_32K: + wm8350_set_bits(wm8350, WM8350_CLOCK_CONTROL_1, + WM8350_MCLK_SEL); + fll_4 = wm8350_codec_read(codec, WM8350_FLL_CONTROL_4) & + ~WM8350_FLL_CLK_SRC_MASK; + wm8350_codec_write(codec, WM8350_FLL_CONTROL_4, fll_4 | clk_id); + break; + } + + /* MCLK direction */ + if (dir == WM8350_MCLK_DIR_OUT) + wm8350_set_bits(wm8350, WM8350_CLOCK_CONTROL_2, + WM8350_MCLK_DIR); + else + wm8350_clear_bits(wm8350, WM8350_CLOCK_CONTROL_2, + WM8350_MCLK_DIR); + + return 0; +} + +static int wm8350_set_clkdiv(struct snd_soc_dai *codec_dai, int div_id, int div) +{ + struct snd_soc_codec *codec = codec_dai->codec; + u16 val; + + switch (div_id) { + case WM8350_ADC_CLKDIV: + val = wm8350_codec_read(codec, WM8350_ADC_DIVIDER) & + ~WM8350_ADC_CLKDIV_MASK; + wm8350_codec_write(codec, WM8350_ADC_DIVIDER, val | div); + break; + case WM8350_DAC_CLKDIV: + val = wm8350_codec_read(codec, WM8350_DAC_CLOCK_CONTROL) & + ~WM8350_DAC_CLKDIV_MASK; + wm8350_codec_write(codec, WM8350_DAC_CLOCK_CONTROL, val | div); + break; + case WM8350_BCLK_CLKDIV: + val = wm8350_codec_read(codec, WM8350_CLOCK_CONTROL_1) & + ~WM8350_BCLK_DIV_MASK; + wm8350_codec_write(codec, WM8350_CLOCK_CONTROL_1, val | div); + break; + case WM8350_OPCLK_CLKDIV: + val = wm8350_codec_read(codec, WM8350_CLOCK_CONTROL_1) & + ~WM8350_OPCLK_DIV_MASK; + wm8350_codec_write(codec, WM8350_CLOCK_CONTROL_1, val | div); + break; + case WM8350_SYS_CLKDIV: + val = wm8350_codec_read(codec, WM8350_CLOCK_CONTROL_1) & + ~WM8350_MCLK_DIV_MASK; + wm8350_codec_write(codec, WM8350_CLOCK_CONTROL_1, val | div); + break; + case WM8350_DACLR_CLKDIV: + val = wm8350_codec_read(codec, WM8350_DAC_LR_RATE) & + ~WM8350_DACLRC_RATE_MASK; + wm8350_codec_write(codec, WM8350_DAC_LR_RATE, val | div); + break; + case WM8350_ADCLR_CLKDIV: + val = wm8350_codec_read(codec, WM8350_ADC_LR_RATE) & + ~WM8350_ADCLRC_RATE_MASK; + wm8350_codec_write(codec, WM8350_ADC_LR_RATE, val | div); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int wm8350_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) +{ + struct snd_soc_codec *codec = codec_dai->codec; + u16 iface = wm8350_codec_read(codec, WM8350_AI_FORMATING) & + ~(WM8350_AIF_BCLK_INV | WM8350_AIF_LRCLK_INV | WM8350_AIF_FMT_MASK); + u16 master = wm8350_codec_read(codec, WM8350_AI_DAC_CONTROL) & + ~WM8350_BCLK_MSTR; + u16 dac_lrc = wm8350_codec_read(codec, WM8350_DAC_LR_RATE) & + ~WM8350_DACLRC_ENA; + u16 adc_lrc = wm8350_codec_read(codec, WM8350_ADC_LR_RATE) & + ~WM8350_ADCLRC_ENA; + + /* set master/slave audio interface */ + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { + case SND_SOC_DAIFMT_CBM_CFM: + master |= WM8350_BCLK_MSTR; + dac_lrc |= WM8350_DACLRC_ENA; + adc_lrc |= WM8350_ADCLRC_ENA; + break; + case SND_SOC_DAIFMT_CBS_CFS: + break; + default: + return -EINVAL; + } + + /* interface format */ + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { + case SND_SOC_DAIFMT_I2S: + iface |= 0x2 << 8; + break; + case SND_SOC_DAIFMT_RIGHT_J: + break; + case SND_SOC_DAIFMT_LEFT_J: + iface |= 0x1 << 8; + break; + case SND_SOC_DAIFMT_DSP_A: + iface |= 0x3 << 8; + break; + case SND_SOC_DAIFMT_DSP_B: + iface |= 0x3 << 8; /* lg not sure which mode */ + break; + default: + return -EINVAL; + } + + /* clock inversion */ + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_IB_IF: + iface |= WM8350_AIF_LRCLK_INV | WM8350_AIF_BCLK_INV; + break; + case SND_SOC_DAIFMT_IB_NF: + iface |= WM8350_AIF_BCLK_INV; + break; + case SND_SOC_DAIFMT_NB_IF: + iface |= WM8350_AIF_LRCLK_INV; + break; + default: + return -EINVAL; + } + + wm8350_codec_write(codec, WM8350_AI_FORMATING, iface); + wm8350_codec_write(codec, WM8350_AI_DAC_CONTROL, master); + wm8350_codec_write(codec, WM8350_DAC_LR_RATE, dac_lrc); + wm8350_codec_write(codec, WM8350_ADC_LR_RATE, adc_lrc); + return 0; +} + +static int wm8350_pcm_trigger(struct snd_pcm_substream *substream, + int cmd, struct snd_soc_dai *codec_dai) +{ + struct snd_soc_codec *codec = codec_dai->codec; + int master = wm8350_codec_cache_read(codec, WM8350_AI_DAC_CONTROL) & + WM8350_BCLK_MSTR; + int enabled = 0; + + /* Check that the DACs or ADCs are enabled since they are + * required for LRC in master mode. The DACs or ADCs need a + * valid audio path i.e. pin -> ADC or DAC -> pin before + * the LRC will be enabled in master mode. */ + if (!master && cmd != SNDRV_PCM_TRIGGER_START) + return 0; + + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) { + enabled = wm8350_codec_cache_read(codec, WM8350_POWER_MGMT_4) & + (WM8350_ADCR_ENA | WM8350_ADCL_ENA); + } else { + enabled = wm8350_codec_cache_read(codec, WM8350_POWER_MGMT_4) & + (WM8350_DACR_ENA | WM8350_DACL_ENA); + } + + if (!enabled) { + dev_err(codec->dev, + "%s: invalid audio path - no clocks available\n", + __func__); + return -EINVAL; + } + return 0; +} + +static int wm8350_pcm_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *codec_dai) +{ + struct snd_soc_codec *codec = codec_dai->codec; + u16 iface = wm8350_codec_read(codec, WM8350_AI_FORMATING) & + ~WM8350_AIF_WL_MASK; + + /* bit size */ + switch (params_format(params)) { + case SNDRV_PCM_FORMAT_S16_LE: + break; + case SNDRV_PCM_FORMAT_S20_3LE: + iface |= 0x1 << 10; + break; + case SNDRV_PCM_FORMAT_S24_LE: + iface |= 0x2 << 10; + break; + case SNDRV_PCM_FORMAT_S32_LE: + iface |= 0x3 << 10; + break; + } + + wm8350_codec_write(codec, WM8350_AI_FORMATING, iface); + return 0; +} + +static int wm8350_mute(struct snd_soc_dai *dai, int mute) +{ + struct snd_soc_codec *codec = dai->codec; + struct wm8350 *wm8350 = codec->control_data; + + if (mute) + wm8350_set_bits(wm8350, WM8350_DAC_MUTE, WM8350_DAC_MUTE_ENA); + else + wm8350_clear_bits(wm8350, WM8350_DAC_MUTE, WM8350_DAC_MUTE_ENA); + return 0; +} + +/* FLL divisors */ +struct _fll_div { + int div; /* FLL_OUTDIV */ + int n; + int k; + int ratio; /* FLL_FRATIO */ +}; + +/* The size in bits of the fll divide multiplied by 10 + * to allow rounding later */ +#define FIXED_FLL_SIZE ((1 << 16) * 10) + +static inline int fll_factors(struct _fll_div *fll_div, unsigned int input, + unsigned int output) +{ + u64 Kpart; + unsigned int t1, t2, K, Nmod; + + if (output >= 2815250 && output <= 3125000) + fll_div->div = 0x4; + else if (output >= 5625000 && output <= 6250000) + fll_div->div = 0x3; + else if (output >= 11250000 && output <= 12500000) + fll_div->div = 0x2; + else if (output >= 22500000 && output <= 25000000) + fll_div->div = 0x1; + else { + printk(KERN_ERR "wm8350: fll freq %d out of range\n", output); + return -EINVAL; + } + + if (input > 48000) + fll_div->ratio = 1; + else + fll_div->ratio = 8; + + t1 = output * (1 << (fll_div->div + 1)); + t2 = input * fll_div->ratio; + + fll_div->n = t1 / t2; + Nmod = t1 % t2; + + if (Nmod) { + Kpart = FIXED_FLL_SIZE * (long long)Nmod; + do_div(Kpart, t2); + K = Kpart & 0xFFFFFFFF; + + /* Check if we need to round */ + if ((K % 10) >= 5) + K += 5; + + /* Move down to proper range now rounding is done */ + K /= 10; + fll_div->k = K; + } else + fll_div->k = 0; + + return 0; +} + +static int wm8350_set_fll(struct snd_soc_dai *codec_dai, + int pll_id, unsigned int freq_in, + unsigned int freq_out) +{ + struct snd_soc_codec *codec = codec_dai->codec; + struct wm8350 *wm8350 = codec->control_data; + struct _fll_div fll_div; + int ret = 0; + u16 fll_1, fll_4; + + /* power down FLL - we need to do this for reconfiguration */ + wm8350_clear_bits(wm8350, WM8350_POWER_MGMT_4, + WM8350_FLL_ENA | WM8350_FLL_OSC_ENA); + + if (freq_out == 0 || freq_in == 0) + return ret; + + ret = fll_factors(&fll_div, freq_in, freq_out); + if (ret < 0) + return ret; + dev_dbg(wm8350->dev, + "FLL in %d FLL out %d N 0x%x K 0x%x div %d ratio %d", + freq_in, freq_out, fll_div.n, fll_div.k, fll_div.div, + fll_div.ratio); + + /* set up N.K & dividers */ + fll_1 = wm8350_codec_read(codec, WM8350_FLL_CONTROL_1) & + ~(WM8350_FLL_OUTDIV_MASK | WM8350_FLL_RSP_RATE_MASK | 0xc000); + wm8350_codec_write(codec, WM8350_FLL_CONTROL_1, + fll_1 | (fll_div.div << 8) | 0x50); + wm8350_codec_write(codec, WM8350_FLL_CONTROL_2, + (fll_div.ratio << 11) | (fll_div. + n & WM8350_FLL_N_MASK)); + wm8350_codec_write(codec, WM8350_FLL_CONTROL_3, fll_div.k); + fll_4 = wm8350_codec_read(codec, WM8350_FLL_CONTROL_4) & + ~(WM8350_FLL_FRAC | WM8350_FLL_SLOW_LOCK_REF); + wm8350_codec_write(codec, WM8350_FLL_CONTROL_4, + fll_4 | (fll_div.k ? WM8350_FLL_FRAC : 0) | + (fll_div.ratio == 8 ? WM8350_FLL_SLOW_LOCK_REF : 0)); + + /* power FLL on */ + wm8350_set_bits(wm8350, WM8350_POWER_MGMT_4, WM8350_FLL_OSC_ENA); + wm8350_set_bits(wm8350, WM8350_POWER_MGMT_4, WM8350_FLL_ENA); + + return 0; +} + +static int wm8350_set_bias_level(struct snd_soc_codec *codec, + enum snd_soc_bias_level level) +{ + struct wm8350 *wm8350 = codec->control_data; + struct wm8350_data *priv = codec->private_data; + struct wm8350_audio_platform_data *platform = + wm8350->codec.platform_data; + u16 pm1; + int ret; + + switch (level) { + case SND_SOC_BIAS_ON: + pm1 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_1) & + ~(WM8350_VMID_MASK | WM8350_CODEC_ISEL_MASK); + wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, + pm1 | WM8350_VMID_50K | + platform->codec_current_on << 14); + break; + + case SND_SOC_BIAS_PREPARE: + pm1 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_1); + pm1 &= ~WM8350_VMID_MASK; + wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, + pm1 | WM8350_VMID_50K); + break; + + case SND_SOC_BIAS_STANDBY: + if (codec->bias_level == SND_SOC_BIAS_OFF) { + ret = regulator_bulk_enable(ARRAY_SIZE(priv->supplies), + priv->supplies); + if (ret != 0) + return ret; + + /* Enable the system clock */ + wm8350_set_bits(wm8350, WM8350_POWER_MGMT_4, + WM8350_SYSCLK_ENA); + + /* mute DAC & outputs */ + wm8350_set_bits(wm8350, WM8350_DAC_MUTE, + WM8350_DAC_MUTE_ENA); + + /* discharge cap memory */ + wm8350_reg_write(wm8350, WM8350_ANTI_POP_CONTROL, + platform->dis_out1 | + (platform->dis_out2 << 2) | + (platform->dis_out3 << 4) | + (platform->dis_out4 << 6)); + + /* wait for discharge */ + schedule_timeout_interruptible(msecs_to_jiffies + (platform-> + cap_discharge_msecs)); + + /* enable antipop */ + wm8350_reg_write(wm8350, WM8350_ANTI_POP_CONTROL, + (platform->vmid_s_curve << 8)); + + /* ramp up vmid */ + wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, + (platform-> + codec_current_charge << 14) | + WM8350_VMID_5K | WM8350_VMIDEN | + WM8350_VBUFEN); + + /* wait for vmid */ + schedule_timeout_interruptible(msecs_to_jiffies + (platform-> + vmid_charge_msecs)); + + /* turn on vmid 300k */ + pm1 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_1) & + ~(WM8350_VMID_MASK | WM8350_CODEC_ISEL_MASK); + pm1 |= WM8350_VMID_300K | + (platform->codec_current_standby << 14); + wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, + pm1); + + + /* enable analogue bias */ + pm1 |= WM8350_BIASEN; + wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, pm1); + + /* disable antipop */ + wm8350_reg_write(wm8350, WM8350_ANTI_POP_CONTROL, 0); + + } else { + /* turn on vmid 300k and reduce current */ + pm1 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_1) & + ~(WM8350_VMID_MASK | WM8350_CODEC_ISEL_MASK); + wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, + pm1 | WM8350_VMID_300K | + (platform-> + codec_current_standby << 14)); + + } + break; + + case SND_SOC_BIAS_OFF: + + /* mute DAC & enable outputs */ + wm8350_set_bits(wm8350, WM8350_DAC_MUTE, WM8350_DAC_MUTE_ENA); + + wm8350_set_bits(wm8350, WM8350_POWER_MGMT_3, + WM8350_OUT1L_ENA | WM8350_OUT1R_ENA | + WM8350_OUT2L_ENA | WM8350_OUT2R_ENA); + + /* enable anti pop S curve */ + wm8350_reg_write(wm8350, WM8350_ANTI_POP_CONTROL, + (platform->vmid_s_curve << 8)); + + /* turn off vmid */ + pm1 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_1) & + ~WM8350_VMIDEN; + wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, pm1); + + /* wait */ + schedule_timeout_interruptible(msecs_to_jiffies + (platform-> + vmid_discharge_msecs)); + + wm8350_reg_write(wm8350, WM8350_ANTI_POP_CONTROL, + (platform->vmid_s_curve << 8) | + platform->dis_out1 | + (platform->dis_out2 << 2) | + (platform->dis_out3 << 4) | + (platform->dis_out4 << 6)); + + /* turn off VBuf and drain */ + pm1 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_1) & + ~(WM8350_VBUFEN | WM8350_VMID_MASK); + wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, + pm1 | WM8350_OUTPUT_DRAIN_EN); + + /* wait */ + schedule_timeout_interruptible(msecs_to_jiffies + (platform->drain_msecs)); + + pm1 &= ~WM8350_BIASEN; + wm8350_reg_write(wm8350, WM8350_POWER_MGMT_1, pm1); + + /* disable anti-pop */ + wm8350_reg_write(wm8350, WM8350_ANTI_POP_CONTROL, 0); + + wm8350_clear_bits(wm8350, WM8350_LOUT1_VOLUME, + WM8350_OUT1L_ENA); + wm8350_clear_bits(wm8350, WM8350_ROUT1_VOLUME, + WM8350_OUT1R_ENA); + wm8350_clear_bits(wm8350, WM8350_LOUT2_VOLUME, + WM8350_OUT2L_ENA); + wm8350_clear_bits(wm8350, WM8350_ROUT2_VOLUME, + WM8350_OUT2R_ENA); + + /* disable clock gen */ + wm8350_clear_bits(wm8350, WM8350_POWER_MGMT_4, + WM8350_SYSCLK_ENA); + + regulator_bulk_disable(ARRAY_SIZE(priv->supplies), + priv->supplies); + break; + } + codec->bias_level = level; + return 0; +} + +static int wm8350_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->codec; + + wm8350_set_bias_level(codec, SND_SOC_BIAS_OFF); + return 0; +} + +static int wm8350_resume(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->codec; + + wm8350_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + + if (codec->suspend_bias_level == SND_SOC_BIAS_ON) + wm8350_set_bias_level(codec, SND_SOC_BIAS_ON); + + return 0; +} + +static struct snd_soc_codec *wm8350_codec; + +static int wm8350_probe(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec; + struct wm8350 *wm8350; + struct wm8350_data *priv; + int ret; + struct wm8350_output *out1; + struct wm8350_output *out2; + + BUG_ON(!wm8350_codec); + + socdev->codec = wm8350_codec; + codec = socdev->codec; + wm8350 = codec->control_data; + priv = codec->private_data; + + /* Enable the codec */ + wm8350_set_bits(wm8350, WM8350_POWER_MGMT_5, WM8350_CODEC_ENA); + + /* Enable robust clocking mode in ADC */ + wm8350_codec_write(codec, WM8350_SECURITY, 0xa7); + wm8350_codec_write(codec, 0xde, 0x13); + wm8350_codec_write(codec, WM8350_SECURITY, 0); + + /* read OUT1 & OUT2 volumes */ + out1 = &priv->out1; + out2 = &priv->out2; + out1->left_vol = (wm8350_reg_read(wm8350, WM8350_LOUT1_VOLUME) & + WM8350_OUT1L_VOL_MASK) >> WM8350_OUT1L_VOL_SHIFT; + out1->right_vol = (wm8350_reg_read(wm8350, WM8350_ROUT1_VOLUME) & + WM8350_OUT1R_VOL_MASK) >> WM8350_OUT1R_VOL_SHIFT; + out2->left_vol = (wm8350_reg_read(wm8350, WM8350_LOUT2_VOLUME) & + WM8350_OUT2L_VOL_MASK) >> WM8350_OUT1L_VOL_SHIFT; + out2->right_vol = (wm8350_reg_read(wm8350, WM8350_ROUT2_VOLUME) & + WM8350_OUT2R_VOL_MASK) >> WM8350_OUT1R_VOL_SHIFT; + wm8350_reg_write(wm8350, WM8350_LOUT1_VOLUME, 0); + wm8350_reg_write(wm8350, WM8350_ROUT1_VOLUME, 0); + wm8350_reg_write(wm8350, WM8350_LOUT2_VOLUME, 0); + wm8350_reg_write(wm8350, WM8350_ROUT2_VOLUME, 0); + + /* Latch VU bits & mute */ + wm8350_set_bits(wm8350, WM8350_LOUT1_VOLUME, + WM8350_OUT1_VU | WM8350_OUT1L_MUTE); + wm8350_set_bits(wm8350, WM8350_LOUT2_VOLUME, + WM8350_OUT2_VU | WM8350_OUT2L_MUTE); + wm8350_set_bits(wm8350, WM8350_ROUT1_VOLUME, + WM8350_OUT1_VU | WM8350_OUT1R_MUTE); + wm8350_set_bits(wm8350, WM8350_ROUT2_VOLUME, + WM8350_OUT2_VU | WM8350_OUT2R_MUTE); + + ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1); + if (ret < 0) { + dev_err(&pdev->dev, "failed to create pcms\n"); + return ret; + } + + wm8350_add_controls(codec); + wm8350_add_widgets(codec); + + wm8350_set_bias_level(codec, SND_SOC_BIAS_STANDBY); + + ret = snd_soc_init_card(socdev); + if (ret < 0) { + dev_err(&pdev->dev, "failed to register card\n"); + goto card_err; + } + + return 0; + +card_err: + snd_soc_free_pcms(socdev); + snd_soc_dapm_free(socdev); + return ret; +} + +static int wm8350_remove(struct platform_device *pdev) +{ + struct snd_soc_device *socdev = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = socdev->codec; + struct wm8350 *wm8350 = codec->control_data; + int ret; + + /* cancel any work waiting to be queued. */ + ret = cancel_delayed_work(&codec->delayed_work); + + /* if there was any work waiting then we run it now and + * wait for its completion */ + if (ret) { + schedule_delayed_work(&codec->delayed_work, 0); + flush_scheduled_work(); + } + + wm8350_set_bias_level(codec, SND_SOC_BIAS_OFF); + + wm8350_clear_bits(wm8350, WM8350_POWER_MGMT_5, WM8350_CODEC_ENA); + + return 0; +} + +#define WM8350_RATES (SNDRV_PCM_RATE_8000_96000) + +#define WM8350_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\ + SNDRV_PCM_FMTBIT_S20_3LE |\ + SNDRV_PCM_FMTBIT_S24_LE) + +struct snd_soc_dai wm8350_dai = { + .name = "WM8350", + .playback = { + .stream_name = "Playback", + .channels_min = 1, + .channels_max = 2, + .rates = WM8350_RATES, + .formats = WM8350_FORMATS, + }, + .capture = { + .stream_name = "Capture", + .channels_min = 1, + .channels_max = 2, + .rates = WM8350_RATES, + .formats = WM8350_FORMATS, + }, + .ops = { + .hw_params = wm8350_pcm_hw_params, + .digital_mute = wm8350_mute, + .trigger = wm8350_pcm_trigger, + .set_fmt = wm8350_set_dai_fmt, + .set_sysclk = wm8350_set_dai_sysclk, + .set_pll = wm8350_set_fll, + .set_clkdiv = wm8350_set_clkdiv, + }, +}; +EXPORT_SYMBOL_GPL(wm8350_dai); + +struct snd_soc_codec_device soc_codec_dev_wm8350 = { + .probe = wm8350_probe, + .remove = wm8350_remove, + .suspend = wm8350_suspend, + .resume = wm8350_resume, +}; +EXPORT_SYMBOL_GPL(soc_codec_dev_wm8350); + +static int wm8350_codec_probe(struct platform_device *pdev) +{ + struct wm8350 *wm8350 = platform_get_drvdata(pdev); + struct wm8350_data *priv; + struct snd_soc_codec *codec; + int ret, i; + + if (wm8350->codec.platform_data == NULL) { + dev_err(&pdev->dev, "No audio platform data supplied\n"); + return -EINVAL; + } + + priv = kzalloc(sizeof(struct wm8350_data), GFP_KERNEL); + if (priv == NULL) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(supply_names); i++) + priv->supplies[i].supply = supply_names[i]; + + ret = regulator_bulk_get(wm8350->dev, ARRAY_SIZE(priv->supplies), + priv->supplies); + if (ret != 0) + goto err_priv; + + codec = &priv->codec; + wm8350->codec.codec = codec; + + wm8350_dai.dev = &pdev->dev; + + mutex_init(&codec->mutex); + INIT_LIST_HEAD(&codec->dapm_widgets); + INIT_LIST_HEAD(&codec->dapm_paths); + codec->dev = &pdev->dev; + codec->name = "WM8350"; + codec->owner = THIS_MODULE; + codec->read = wm8350_codec_read; + codec->write = wm8350_codec_write; + codec->bias_level = SND_SOC_BIAS_OFF; + codec->set_bias_level = wm8350_set_bias_level; + codec->dai = &wm8350_dai; + codec->num_dai = 1; + codec->reg_cache_size = WM8350_MAX_REGISTER; + codec->private_data = priv; + codec->control_data = wm8350; + + /* Put the codec into reset if it wasn't already */ + wm8350_clear_bits(wm8350, WM8350_POWER_MGMT_5, WM8350_CODEC_ENA); + + INIT_DELAYED_WORK(&codec->delayed_work, wm8350_pga_work); + ret = snd_soc_register_codec(codec); + if (ret != 0) + goto err_supply; + + wm8350_codec = codec; + + ret = snd_soc_register_dai(&wm8350_dai); + if (ret != 0) + goto err_codec; + return 0; + +err_codec: + snd_soc_unregister_codec(codec); +err_supply: + regulator_bulk_free(ARRAY_SIZE(priv->supplies), priv->supplies); +err_priv: + kfree(priv); + wm8350_codec = NULL; + return ret; +} + +static int wm8350_codec_remove(struct platform_device *pdev) +{ + struct wm8350 *wm8350 = platform_get_drvdata(pdev); + struct snd_soc_codec *codec = wm8350->codec.codec; + struct wm8350_data *priv = codec->private_data; + + snd_soc_unregister_dai(&wm8350_dai); + snd_soc_unregister_codec(codec); + regulator_bulk_free(ARRAY_SIZE(priv->supplies), priv->supplies); + kfree(priv); + wm8350_codec = NULL; + return 0; +} + +static struct platform_driver wm8350_codec_driver = { + .driver = { + .name = "wm8350-codec", + .owner = THIS_MODULE, + }, + .probe = wm8350_codec_probe, + .remove = __devexit_p(wm8350_codec_remove), +}; + +static __init int wm8350_init(void) +{ + return platform_driver_register(&wm8350_codec_driver); +} +module_init(wm8350_init); + +static __exit void wm8350_exit(void) +{ + platform_driver_unregister(&wm8350_codec_driver); +} +module_exit(wm8350_exit); + +MODULE_DESCRIPTION("ASoC WM8350 driver"); +MODULE_AUTHOR("Liam Girdwood"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:wm8350-codec"); diff --git a/sound/soc/codecs/wm8350.h b/sound/soc/codecs/wm8350.h new file mode 100644 index 000000000000..cc2887aa6c38 --- /dev/null +++ b/sound/soc/codecs/wm8350.h @@ -0,0 +1,20 @@ +/* + * wm8350.h - WM8903 audio codec interface + * + * Copyright 2008 Wolfson Microelectronics PLC. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef _WM8350_H +#define _WM8350_H + +#include + +extern struct snd_soc_dai wm8350_dai; +extern struct snd_soc_codec_device soc_codec_dev_wm8350; + +#endif -- cgit v1.2.3 From 64db4cfff99c04cd5f550357edcc8780f96b54a2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 18 Dec 2008 21:55:32 +0100 Subject: "Tree RCU": scalable classic RCU implementation This patch fixes a long-standing performance bug in classic RCU that results in massive internal-to-RCU lock contention on systems with more than a few hundred CPUs. Although this patch creates a separate flavor of RCU for ease of review and patch maintenance, it is intended to replace classic RCU. This patch still handles stress better than does mainline, so I am still calling it ready for inclusion. This patch is against the -tip tree. Nevertheless, experience on an actual 1000+ CPU machine would still be most welcome. Most of the changes noted below were found while creating an rcutiny (which should permit ejecting the current rcuclassic) and while doing detailed line-by-line documentation. Updates from v9 (http://lkml.org/lkml/2008/12/2/334): o Fixes from remainder of line-by-line code walkthrough, including comment spelling, initialization, undesirable narrowing due to type conversion, removing redundant memory barriers, removing redundant local-variable initialization, and removing redundant local variables. I do not believe that any of these fixes address the CPU-hotplug issues that Andi Kleen was seeing, but please do give it a whirl in case the machine is smarter than I am. A writeup from the walkthrough may be found at the following URL, in case you are suffering from terminal insomnia or masochism: http://www.kernel.org/pub/linux/kernel/people/paulmck/tmp/rcutree-walkthrough.2008.12.16a.pdf o Made rcutree tracing use seq_file, as suggested some time ago by Lai Jiangshan. o Added a .csv variant of the rcudata debugfs trace file, to allow people having thousands of CPUs to drop the data into a spreadsheet. Tested with oocalc and gnumeric. Updated documentation to suit. Updates from v8 (http://lkml.org/lkml/2008/11/15/139): o Fix a theoretical race between grace-period initialization and force_quiescent_state() that could occur if more than three jiffies were required to carry out the grace-period initialization. Which it might, if you had enough CPUs. o Apply Ingo's printk-standardization patch. o Substitute local variables for repeated accesses to global variables. o Fix comment misspellings and redundant (but harmless) increments of ->n_rcu_pending (this latter after having explicitly added it). o Apply checkpatch fixes. Updates from v7 (http://lkml.org/lkml/2008/10/10/291): o Fixed a number of problems noted by Gautham Shenoy, including the cpu-stall-detection bug that he was having difficulty convincing me was real. ;-) o Changed cpu-stall detection to wait for ten seconds rather than three in order to reduce false positive, as suggested by Ingo Molnar. o Produced a design document (http://lwn.net/Articles/305782/). The act of writing this document uncovered a number of both theoretical and "here and now" bugs as noted below. o Fix dynticks_nesting accounting confusion, simplify WARN_ON() condition, fix kerneldoc comments, and add memory barriers in dynticks interface functions. o Add more data to tracing. o Remove unused "rcu_barrier" field from rcu_data structure. o Count calls to rcu_pending() from scheduling-clock interrupt to use as a surrogate timebase should jiffies stop counting. o Fix a theoretical race between force_quiescent_state() and grace-period initialization. Yes, initialization does have to go on for some jiffies for this race to occur, but given enough CPUs... Updates from v6 (http://lkml.org/lkml/2008/9/23/448): o Fix a number of checkpatch.pl complaints. o Apply review comments from Ingo Molnar and Lai Jiangshan on the stall-detection code. o Fix several bugs in !CONFIG_SMP builds. o Fix a misspelled config-parameter name so that RCU now announces at boot time if stall detection is configured. o Run tests on numerous combinations of configurations parameters, which after the fixes above, now build and run correctly. Updates from v5 (http://lkml.org/lkml/2008/9/15/92, bad subject line): o Fix a compiler error in the !CONFIG_FANOUT_EXACT case (blew a changeset some time ago, and finally got around to retesting this option). o Fix some tracing bugs in rcupreempt that caused incorrect totals to be printed. o I now test with a more brutal random-selection online/offline script (attached). Probably more brutal than it needs to be on the people reading it as well, but so it goes. o A number of optimizations and usability improvements: o Make rcu_pending() ignore the grace-period timeout when there is no grace period in progress. o Make force_quiescent_state() avoid going for a global lock in the case where there is no grace period in progress. o Rearrange struct fields to improve struct layout. o Make call_rcu() initiate a grace period if RCU was idle, rather than waiting for the next scheduling clock interrupt. o Invoke rcu_irq_enter() and rcu_irq_exit() only when idle, as suggested by Andi Kleen. I still don't completely trust this change, and might back it out. o Make CONFIG_RCU_TRACE be the single config variable manipulated for all forms of RCU, instead of the prior confusion. o Document tracing files and formats for both rcupreempt and rcutree. Updates from v4 for those missing v5 given its bad subject line: o Separated dynticks interface so that NMIs and irqs call separate functions, greatly simplifying it. In particular, this code no longer requires a proof of correctness. ;-) o Separated dynticks state out into its own per-CPU structure, avoiding the duplicated accounting. o The case where a dynticks-idle CPU runs an irq handler that invokes call_rcu() is now correctly handled, forcing that CPU out of dynticks-idle mode. o Review comments have been applied (thank you all!!!). For but one example, fixed the dynticks-ordering issue that Manfred pointed out, saving me much debugging. ;-) o Adjusted rcuclassic and rcupreempt to handle dynticks changes. Attached is an updated patch to Classic RCU that applies a hierarchy, greatly reducing the contention on the top-level lock for large machines. This passes 10-hour concurrent rcutorture and online-offline testing on 128-CPU ppc64 without dynticks enabled, and exposes some timekeeping bugs in presence of dynticks (exciting working on a system where "sleep 1" hangs until interrupted...), which were fixed in the 2.6.27 kernel. It is getting more reliable than mainline by some measures, so the next version will be against -tip for inclusion. See also Manfred Spraul's recent patches (or his earlier work from 2004 at http://marc.info/?l=linux-kernel&m=108546384711797&w=2). We will converge onto a common patch in the fullness of time, but are currently exploring different regions of the design space. That said, I have already gratefully stolen quite a few of Manfred's ideas. This patch provides CONFIG_RCU_FANOUT, which controls the bushiness of the RCU hierarchy. Defaults to 32 on 32-bit machines and 64 on 64-bit machines. If CONFIG_NR_CPUS is less than CONFIG_RCU_FANOUT, there is no hierarchy. By default, the RCU initialization code will adjust CONFIG_RCU_FANOUT to balance the hierarchy, so strongly NUMA architectures may choose to set CONFIG_RCU_FANOUT_EXACT to disable this balancing, allowing the hierarchy to be exactly aligned to the underlying hardware. Up to two levels of hierarchy are permitted (in addition to the root node), allowing up to 16,384 CPUs on 32-bit systems and up to 262,144 CPUs on 64-bit systems. I just know that I am going to regret saying this, but this seems more than sufficient for the foreseeable future. (Some architectures might wish to set CONFIG_RCU_FANOUT=4, which would limit such architectures to 64 CPUs. If this becomes a real problem, additional levels can be added, but I doubt that it will make a significant difference on real hardware.) In the common case, a given CPU will manipulate its private rcu_data structure and the rcu_node structure that it shares with its immediate neighbors. This can reduce both lock and memory contention by multiple orders of magnitude, which should eliminate the need for the strange manipulations that are reported to be required when running Linux on very large systems. Some shortcomings: o More bugs will probably surface as a result of an ongoing line-by-line code inspection. Patches will be provided as required. o There are probably hangs, rcutorture failures, &c. Seems quite stable on a 128-CPU machine, but that is kind of small compared to 4096 CPUs. However, seems to do better than mainline. Patches will be provided as required. o The memory footprint of this version is several KB larger than rcuclassic. A separate UP-only rcutiny patch will be provided, which will reduce the memory footprint significantly, even compared to the old rcuclassic. One such patch passes light testing, and has a memory footprint smaller even than rcuclassic. Initial reaction from various embedded guys was "it is not worth it", so am putting it aside. Credits: o Manfred Spraul for ideas, review comments, and bugs spotted, as well as some good friendly competition. ;-) o Josh Triplett, Ingo Molnar, Peter Zijlstra, Mathieu Desnoyers, Lai Jiangshan, Andi Kleen, Andy Whitcroft, and Andrew Morton for reviews and comments. o Thomas Gleixner for much-needed help with some timer issues (see patches below). o Jon M. Tollefson, Tim Pepper, Andrew Theurer, Jose R. Santos, Andy Whitcroft, Darrick Wong, Nishanth Aravamudan, Anton Blanchard, Dave Kleikamp, and Nathan Lynch for keeping machines alive despite my heavy abuse^Wtesting. Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- Documentation/RCU/00-INDEX | 2 + Documentation/RCU/trace.txt | 413 +++++++++ arch/powerpc/platforms/pseries/rtasd.c | 4 + include/linux/hardirq.h | 14 +- include/linux/rcupdate.h | 10 +- include/linux/rcutree.h | 329 +++++++ init/Kconfig | 18 +- kernel/Kconfig.preempt | 62 +- kernel/Makefile | 6 +- kernel/rcupreempt.c | 10 + kernel/rcupreempt_trace.c | 10 +- kernel/rcutree.c | 1535 ++++++++++++++++++++++++++++++++ kernel/rcutree_trace.c | 271 ++++++ kernel/softirq.c | 5 +- lib/Kconfig.debug | 13 + 15 files changed, 2671 insertions(+), 31 deletions(-) create mode 100644 Documentation/RCU/trace.txt create mode 100644 include/linux/rcutree.h create mode 100644 kernel/rcutree.c create mode 100644 kernel/rcutree_trace.c (limited to 'include') diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX index 461481dfb7c3..7dc0695a8f90 100644 --- a/Documentation/RCU/00-INDEX +++ b/Documentation/RCU/00-INDEX @@ -16,6 +16,8 @@ RTFP.txt - List of RCU papers (bibliography) going back to 1980. torture.txt - RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST) +trace.txt + - CONFIG_RCU_TRACE debugfs files and formats UP.txt - RCU on Uniprocessor Systems whatisRCU.txt diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt new file mode 100644 index 000000000000..068848240a8b --- /dev/null +++ b/Documentation/RCU/trace.txt @@ -0,0 +1,413 @@ +CONFIG_RCU_TRACE debugfs Files and Formats + + +The rcupreempt and rcutree implementations of RCU provide debugfs trace +output that summarizes counters and state. This information is useful for +debugging RCU itself, and can sometimes also help to debug abuses of RCU. +Note that the rcuclassic implementation of RCU does not provide debugfs +trace output. + +The following sections describe the debugfs files and formats for +preemptable RCU (rcupreempt) and hierarchical RCU (rcutree). + + +Preemptable RCU debugfs Files and Formats + +This implementation of RCU provides three debugfs files under the +top-level directory RCU: rcu/rcuctrs (which displays the per-CPU +counters used by preemptable RCU) rcu/rcugp (which displays grace-period +counters), and rcu/rcustats (which internal counters for debugging RCU). + +The output of "cat rcu/rcuctrs" looks as follows: + +CPU last cur F M + 0 5 -5 0 0 + 1 -1 0 0 0 + 2 0 1 0 0 + 3 0 1 0 0 + 4 0 1 0 0 + 5 0 1 0 0 + 6 0 2 0 0 + 7 0 -1 0 0 + 8 0 1 0 0 +ggp = 26226, state = waitzero + +The per-CPU fields are as follows: + +o "CPU" gives the CPU number. Offline CPUs are not displayed. + +o "last" gives the value of the counter that is being decremented + for the current grace period phase. In the example above, + the counters sum to 4, indicating that there are still four + RCU read-side critical sections still running that started + before the last counter flip. + +o "cur" gives the value of the counter that is currently being + both incremented (by rcu_read_lock()) and decremented (by + rcu_read_unlock()). In the example above, the counters sum to + 1, indicating that there is only one RCU read-side critical section + still running that started after the last counter flip. + +o "F" indicates whether RCU is waiting for this CPU to acknowledge + a counter flip. In the above example, RCU is not waiting on any, + which is consistent with the state being "waitzero" rather than + "waitack". + +o "M" indicates whether RCU is waiting for this CPU to execute a + memory barrier. In the above example, RCU is not waiting on any, + which is consistent with the state being "waitzero" rather than + "waitmb". + +o "ggp" is the global grace-period counter. + +o "state" is the RCU state, which can be one of the following: + + o "idle": there is no grace period in progress. + + o "waitack": RCU just incremented the global grace-period + counter, which has the effect of reversing the roles of + the "last" and "cur" counters above, and is waiting for + all the CPUs to acknowledge the flip. Once the flip has + been acknowledged, CPUs will no longer be incrementing + what are now the "last" counters, so that their sum will + decrease monotonically down to zero. + + o "waitzero": RCU is waiting for the sum of the "last" counters + to decrease to zero. + + o "waitmb": RCU is waiting for each CPU to execute a memory + barrier, which ensures that instructions from a given CPU's + last RCU read-side critical section cannot be reordered + with instructions following the memory-barrier instruction. + +The output of "cat rcu/rcugp" looks as follows: + +oldggp=48870 newggp=48873 + +Note that reading from this file provokes a synchronize_rcu(). The +"oldggp" value is that of "ggp" from rcu/rcuctrs above, taken before +executing the synchronize_rcu(), and the "newggp" value is also the +"ggp" value, but taken after the synchronize_rcu() command returns. + + +The output of "cat rcu/rcugp" looks as follows: + +na=1337955 nl=40 wa=1337915 wl=44 da=1337871 dl=0 dr=1337871 di=1337871 +1=50989 e1=6138 i1=49722 ie1=82 g1=49640 a1=315203 ae1=265563 a2=49640 +z1=1401244 ze1=1351605 z2=49639 m1=5661253 me1=5611614 m2=49639 + +These are counters tracking internal preemptable-RCU events, however, +some of them may be useful for debugging algorithms using RCU. In +particular, the "nl", "wl", and "dl" values track the number of RCU +callbacks in various states. The fields are as follows: + +o "na" is the total number of RCU callbacks that have been enqueued + since boot. + +o "nl" is the number of RCU callbacks waiting for the previous + grace period to end so that they can start waiting on the next + grace period. + +o "wa" is the total number of RCU callbacks that have started waiting + for a grace period since boot. "na" should be roughly equal to + "nl" plus "wa". + +o "wl" is the number of RCU callbacks currently waiting for their + grace period to end. + +o "da" is the total number of RCU callbacks whose grace periods + have completed since boot. "wa" should be roughly equal to + "wl" plus "da". + +o "dr" is the total number of RCU callbacks that have been removed + from the list of callbacks ready to invoke. "dr" should be roughly + equal to "da". + +o "di" is the total number of RCU callbacks that have been invoked + since boot. "di" should be roughly equal to "da", though some + early versions of preemptable RCU had a bug so that only the + last CPU's count of invocations was displayed, rather than the + sum of all CPU's counts. + +o "1" is the number of calls to rcu_try_flip(). This should be + roughly equal to the sum of "e1", "i1", "a1", "z1", and "m1" + described below. In other words, the number of times that + the state machine is visited should be equal to the sum of the + number of times that each state is visited plus the number of + times that the state-machine lock acquisition failed. + +o "e1" is the number of times that rcu_try_flip() was unable to + acquire the fliplock. + +o "i1" is the number of calls to rcu_try_flip_idle(). + +o "ie1" is the number of times rcu_try_flip_idle() exited early + due to the calling CPU having no work for RCU. + +o "g1" is the number of times that rcu_try_flip_idle() decided + to start a new grace period. "i1" should be roughly equal to + "ie1" plus "g1". + +o "a1" is the number of calls to rcu_try_flip_waitack(). + +o "ae1" is the number of times that rcu_try_flip_waitack() found + that at least one CPU had not yet acknowledge the new grace period + (AKA "counter flip"). + +o "a2" is the number of time rcu_try_flip_waitack() found that + all CPUs had acknowledged. "a1" should be roughly equal to + "ae1" plus "a2". (This particular output was collected on + a 128-CPU machine, hence the smaller-than-usual fraction of + calls to rcu_try_flip_waitack() finding all CPUs having already + acknowledged.) + +o "z1" is the number of calls to rcu_try_flip_waitzero(). + +o "ze1" is the number of times that rcu_try_flip_waitzero() found + that not all of the old RCU read-side critical sections had + completed. + +o "z2" is the number of times that rcu_try_flip_waitzero() finds + the sum of the counters equal to zero, in other words, that + all of the old RCU read-side critical sections had completed. + The value of "z1" should be roughly equal to "ze1" plus + "z2". + +o "m1" is the number of calls to rcu_try_flip_waitmb(). + +o "me1" is the number of times that rcu_try_flip_waitmb() finds + that at least one CPU has not yet executed a memory barrier. + +o "m2" is the number of times that rcu_try_flip_waitmb() finds that + all CPUs have executed a memory barrier. + + +Hierarchical RCU debugfs Files and Formats + +This implementation of RCU provides three debugfs files under the +top-level directory RCU: rcu/rcudata (which displays fields in struct +rcu_data), rcu/rcugp (which displays grace-period counters), and +rcu/rcuhier (which displays the struct rcu_node hierarchy). + +The output of "cat rcu/rcudata" looks as follows: + +rcu: + 0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10 + 1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10 + 2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10 + 3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10 + 4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10 + 5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10 + 6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10 + 7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10 +rcu_bh: + 0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10 + 1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10 + 2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10 + 3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10 + 4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 + 5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 + 6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10 + 7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10 + +The first section lists the rcu_data structures for rcu, the second for +rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. +The fields are as follows: + +o The number at the beginning of each line is the CPU number. + CPUs numbers followed by an exclamation mark are offline, + but have been online at least once since boot. There will be + no output for CPUs that have never been online, which can be + a good thing in the surprisingly common case where NR_CPUS is + substantially larger than the number of actual CPUs. + +o "c" is the count of grace periods that this CPU believes have + completed. CPUs in dynticks idle mode may lag quite a ways + behind, for example, CPU 4 under "rcu" above, which has slept + through the past 25 RCU grace periods. It is not unusual to + see CPUs lagging by thousands of grace periods. + +o "g" is the count of grace periods that this CPU believes have + started. Again, CPUs in dynticks idle mode may lag behind. + If the "c" and "g" values are equal, this CPU has already + reported a quiescent state for the last RCU grace period that + it is aware of, otherwise, the CPU believes that it owes RCU a + quiescent state. + +o "pq" indicates that this CPU has passed through a quiescent state + for the current grace period. It is possible for "pq" to be + "1" and "c" different than "g", which indicates that although + the CPU has passed through a quiescent state, either (1) this + CPU has not yet reported that fact, (2) some other CPU has not + yet reported for this grace period, or (3) both. + +o "pqc" indicates which grace period the last-observed quiescent + state for this CPU corresponds to. This is important for handling + the race between CPU 0 reporting an extended dynticks-idle + quiescent state for CPU 1 and CPU 1 suddenly waking up and + reporting its own quiescent state. If CPU 1 was the last CPU + for the current grace period, then the CPU that loses this race + will attempt to incorrectly mark CPU 1 as having checked in for + the next grace period! + +o "qp" indicates that RCU still expects a quiescent state from + this CPU. + +o "rpfq" is the number of rcu_pending() calls on this CPU required + to induce this CPU to invoke force_quiescent_state(). + +o "rp" is low-order four hex digits of the count of how many times + rcu_pending() has been invoked on this CPU. + +o "dt" is the current value of the dyntick counter that is incremented + when entering or leaving dynticks idle state, either by the + scheduler or by irq. The number after the "/" is the interrupt + nesting depth when in dyntick-idle state, or one greater than + the interrupt-nesting depth otherwise. + + This field is displayed only for CONFIG_NO_HZ kernels. + +o "dn" is the current value of the dyntick counter that is incremented + when entering or leaving dynticks idle state via NMI. If both + the "dt" and "dn" values are even, then this CPU is in dynticks + idle mode and may be ignored by RCU. If either of these two + counters is odd, then RCU must be alert to the possibility of + an RCU read-side critical section running on this CPU. + + This field is displayed only for CONFIG_NO_HZ kernels. + +o "df" is the number of times that some other CPU has forced a + quiescent state on behalf of this CPU due to this CPU being in + dynticks-idle state. + + This field is displayed only for CONFIG_NO_HZ kernels. + +o "of" is the number of times that some other CPU has forced a + quiescent state on behalf of this CPU due to this CPU being + offline. In a perfect world, this might neve happen, but it + turns out that offlining and onlining a CPU can take several grace + periods, and so there is likely to be an extended period of time + when RCU believes that the CPU is online when it really is not. + Please note that erring in the other direction (RCU believing a + CPU is offline when it is really alive and kicking) is a fatal + error, so it makes sense to err conservatively. + +o "ri" is the number of times that RCU has seen fit to send a + reschedule IPI to this CPU in order to get it to report a + quiescent state. + +o "ql" is the number of RCU callbacks currently residing on + this CPU. This is the total number of callbacks, regardless + of what state they are in (new, waiting for grace period to + start, waiting for grace period to end, ready to invoke). + +o "b" is the batch limit for this CPU. If more than this number + of RCU callbacks is ready to invoke, then the remainder will + be deferred. + + +The output of "cat rcu/rcugp" looks as follows: + +rcu: completed=33062 gpnum=33063 +rcu_bh: completed=464 gpnum=464 + +Again, this output is for both "rcu" and "rcu_bh". The fields are +taken from the rcu_state structure, and are as follows: + +o "completed" is the number of grace periods that have completed. + It is comparable to the "c" field from rcu/rcudata in that a + CPU whose "c" field matches the value of "completed" is aware + that the corresponding RCU grace period has completed. + +o "gpnum" is the number of grace periods that have started. It is + comparable to the "g" field from rcu/rcudata in that a CPU + whose "g" field matches the value of "gpnum" is aware that the + corresponding RCU grace period has started. + + If these two fields are equal (as they are for "rcu_bh" above), + then there is no grace period in progress, in other words, RCU + is idle. On the other hand, if the two fields differ (as they + do for "rcu" above), then an RCU grace period is in progress. + + +The output of "cat rcu/rcuhier" looks as follows, with very long lines: + +c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 +1/1 0:127 ^0 +3/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 +3/3f 0:5 ^0 2/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 +rcu_bh: +c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 +0/1 0:127 ^0 +0/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 +0/3f 0:5 ^0 0/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 + +This is once again split into "rcu" and "rcu_bh" portions. The fields are +as follows: + +o "c" is exactly the same as "completed" under rcu/rcugp. + +o "g" is exactly the same as "gpnum" under rcu/rcugp. + +o "s" is the "signaled" state that drives force_quiescent_state()'s + state machine. + +o "jfq" is the number of jiffies remaining for this grace period + before force_quiescent_state() is invoked to help push things + along. Note that CPUs in dyntick-idle mode thoughout the grace + period will not report on their own, but rather must be check by + some other CPU via force_quiescent_state(). + +o "j" is the low-order four hex digits of the jiffies counter. + Yes, Paul did run into a number of problems that turned out to + be due to the jiffies counter no longer counting. Why do you ask? + +o "nfqs" is the number of calls to force_quiescent_state() since + boot. + +o "nfqsng" is the number of useless calls to force_quiescent_state(), + where there wasn't actually a grace period active. This can + happen due to races. The number in parentheses is the difference + between "nfqs" and "nfqsng", or the number of times that + force_quiescent_state() actually did some real work. + +o "fqlh" is the number of calls to force_quiescent_state() that + exited immediately (without even being counted in nfqs above) + due to contention on ->fqslock. + +o Each element of the form "1/1 0:127 ^0" represents one struct + rcu_node. Each line represents one level of the hierarchy, from + root to leaves. It is best to think of the rcu_data structures + as forming yet another level after the leaves. Note that there + might be either one, two, or three levels of rcu_node structures, + depending on the relationship between CONFIG_RCU_FANOUT and + CONFIG_NR_CPUS. + + o The numbers separated by the "/" are the qsmask followed + by the qsmaskinit. The qsmask will have one bit + set for each entity in the next lower level that + has not yet checked in for the current grace period. + The qsmaskinit will have one bit for each entity that is + currently expected to check in during each grace period. + The value of qsmaskinit is assigned to that of qsmask + at the beginning of each grace period. + + For example, for "rcu", the qsmask of the first entry + of the lowest level is 0x14, meaning that we are still + waiting for CPUs 2 and 4 to check in for the current + grace period. + + o The numbers separated by the ":" are the range of CPUs + served by this struct rcu_node. This can be helpful + in working out how the hierarchy is wired together. + + For example, the first entry at the lowest level shows + "0:5", indicating that it covers CPUs 0 through 5. + + o The number after the "^" indicates the bit in the + next higher level rcu_node structure that this + rcu_node structure corresponds to. + + For example, the first entry at the lowest level shows + "^0", indicating that it corresponds to bit zero in + the first entry at the middle level. diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c index f4e55be2eea9..afad9f5ac0ac 100644 --- a/arch/powerpc/platforms/pseries/rtasd.c +++ b/arch/powerpc/platforms/pseries/rtasd.c @@ -208,6 +208,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) break; case ERR_TYPE_KERNEL_PANIC: default: + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); return; } @@ -227,6 +228,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) /* Check to see if we need to or have stopped logging */ if (fatal || !logging_enabled) { logging_enabled = 0; + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); return; } @@ -249,11 +251,13 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) else rtas_log_start += 1; + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); wake_up_interruptible(&rtas_log_wait); break; case ERR_TYPE_KERNEL_PANIC: default: + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ spin_unlock_irqrestore(&rtasd_log_lock, s); return; } diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 181006cc94a0..9b70b9231693 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -118,13 +118,17 @@ static inline void account_system_vtime(struct task_struct *tsk) } #endif -#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ) +#if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); +extern void rcu_nmi_enter(void); +extern void rcu_nmi_exit(void); #else # define rcu_irq_enter() do { } while (0) # define rcu_irq_exit() do { } while (0) -#endif /* CONFIG_PREEMPT_RCU */ +# define rcu_nmi_enter() do { } while (0) +# define rcu_nmi_exit() do { } while (0) +#endif /* #if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) */ /* * It is safe to do non-atomic ops on ->hardirq_context, @@ -134,7 +138,6 @@ extern void rcu_irq_exit(void); */ #define __irq_enter() \ do { \ - rcu_irq_enter(); \ account_system_vtime(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ @@ -153,7 +156,6 @@ extern void irq_enter(void); trace_hardirq_exit(); \ account_system_vtime(current); \ sub_preempt_count(HARDIRQ_OFFSET); \ - rcu_irq_exit(); \ } while (0) /* @@ -161,7 +163,7 @@ extern void irq_enter(void); */ extern void irq_exit(void); -#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) -#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) +#define nmi_enter() do { lockdep_off(); rcu_nmi_enter(); __irq_enter(); } while (0) +#define nmi_exit() do { __irq_exit(); rcu_nmi_exit(); lockdep_on(); } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 86f1f5e43e33..bfd289aff576 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -52,11 +52,15 @@ struct rcu_head { void (*func)(struct rcu_head *head); }; -#ifdef CONFIG_CLASSIC_RCU +#if defined(CONFIG_CLASSIC_RCU) #include -#else /* #ifdef CONFIG_CLASSIC_RCU */ +#elif defined(CONFIG_TREE_RCU) +#include +#elif defined(CONFIG_PREEMPT_RCU) #include -#endif /* #else #ifdef CONFIG_CLASSIC_RCU */ +#else +#error "Unknown RCU implementation specified to kernel configuration" +#endif /* #else #if defined(CONFIG_CLASSIC_RCU) */ #define RCU_HEAD_INIT { .next = NULL, .func = NULL } #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h new file mode 100644 index 000000000000..d4368b7975c3 --- /dev/null +++ b/include/linux/rcutree.h @@ -0,0 +1,329 @@ +/* + * Read-Copy Update mechanism for mutual exclusion (tree-based version) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Author: Dipankar Sarma + * Paul E. McKenney Hierarchical algorithm + * + * Based on the original work by Paul McKenney + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ + +#ifndef __LINUX_RCUTREE_H +#define __LINUX_RCUTREE_H + +#include +#include +#include +#include +#include +#include + +/* + * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. + * In theory, it should be possible to add more levels straightforwardly. + * In practice, this has not been tested, so there is probably some + * bug somewhere. + */ +#define MAX_RCU_LVLS 3 +#define RCU_FANOUT (CONFIG_RCU_FANOUT) +#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) +#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) + +#if NR_CPUS <= RCU_FANOUT +# define NUM_RCU_LVLS 1 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (NR_CPUS) +# define NUM_RCU_LVL_2 0 +# define NUM_RCU_LVL_3 0 +#elif NR_CPUS <= RCU_FANOUT_SQ +# define NUM_RCU_LVLS 2 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT) +# define NUM_RCU_LVL_2 (NR_CPUS) +# define NUM_RCU_LVL_3 0 +#elif NR_CPUS <= RCU_FANOUT_CUBE +# define NUM_RCU_LVLS 3 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ) +# define NUM_RCU_LVL_2 (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT)) +# define NUM_RCU_LVL_3 NR_CPUS +#else +# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" +#endif /* #if (NR_CPUS) <= RCU_FANOUT */ + +#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) +#define NUM_RCU_NODES (RCU_SUM - NR_CPUS) + +/* + * Dynticks per-CPU state. + */ +struct rcu_dynticks { + int dynticks_nesting; /* Track nesting level, sort of. */ + int dynticks; /* Even value for dynticks-idle, else odd. */ + int dynticks_nmi; /* Even value for either dynticks-idle or */ + /* not in nmi handler, else odd. So this */ + /* remains even for nmi from irq handler. */ +}; + +/* + * Definition for node within the RCU grace-period-detection hierarchy. + */ +struct rcu_node { + spinlock_t lock; + unsigned long qsmask; /* CPUs or groups that need to switch in */ + /* order for current grace period to proceed.*/ + unsigned long qsmaskinit; + /* Per-GP initialization for qsmask. */ + unsigned long grpmask; /* Mask to apply to parent qsmask. */ + int grplo; /* lowest-numbered CPU or group here. */ + int grphi; /* highest-numbered CPU or group here. */ + u8 grpnum; /* CPU/group number for next level up. */ + u8 level; /* root is at level 0. */ + struct rcu_node *parent; +} ____cacheline_internodealigned_in_smp; + +/* Index values for nxttail array in struct rcu_data. */ +#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ +#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ +#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ +#define RCU_NEXT_TAIL 3 +#define RCU_NEXT_SIZE 4 + +/* Per-CPU data for read-copy update. */ +struct rcu_data { + /* 1) quiescent-state and grace-period handling : */ + long completed; /* Track rsp->completed gp number */ + /* in order to detect GP end. */ + long gpnum; /* Highest gp number that this CPU */ + /* is aware of having started. */ + long passed_quiesc_completed; + /* Value of completed at time of qs. */ + bool passed_quiesc; /* User-mode/idle loop etc. */ + bool qs_pending; /* Core waits for quiesc state. */ + bool beenonline; /* CPU online at least once. */ + struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ + unsigned long grpmask; /* Mask to apply to leaf qsmask. */ + + /* 2) batch handling */ + /* + * If nxtlist is not NULL, it is partitioned as follows. + * Any of the partitions might be empty, in which case the + * pointer to that partition will be equal to the pointer for + * the following partition. When the list is empty, all of + * the nxttail elements point to nxtlist, which is NULL. + * + * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]): + * Entries that might have arrived after current GP ended + * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): + * Entries known to have arrived before current GP ended + * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): + * Entries that batch # <= ->completed - 1: waiting for current GP + * [nxtlist, *nxttail[RCU_DONE_TAIL]): + * Entries that batch # <= ->completed + * The grace period for these entries has completed, and + * the other grace-period-completed entries may be moved + * here temporarily in rcu_process_callbacks(). + */ + struct rcu_head *nxtlist; + struct rcu_head **nxttail[RCU_NEXT_SIZE]; + long qlen; /* # of queued callbacks */ + long blimit; /* Upper limit on a processed batch */ + +#ifdef CONFIG_NO_HZ + /* 3) dynticks interface. */ + struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ + int dynticks_snap; /* Per-GP tracking for dynticks. */ + int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */ +#endif /* #ifdef CONFIG_NO_HZ */ + + /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ +#ifdef CONFIG_NO_HZ + unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ +#endif /* #ifdef CONFIG_NO_HZ */ + unsigned long offline_fqs; /* Kicked due to being offline. */ + unsigned long resched_ipi; /* Sent a resched IPI. */ + + /* 5) state to allow this CPU to force_quiescent_state on others */ + long n_rcu_pending; /* rcu_pending() calls since boot. */ + long n_rcu_pending_force_qs; /* when to force quiescent states. */ + + int cpu; +}; + +/* Values for signaled field in struct rcu_state. */ +#define RCU_GP_INIT 0 /* Grace period being initialized. */ +#define RCU_SAVE_DYNTICK 1 /* Need to scan dyntick state. */ +#define RCU_FORCE_QS 2 /* Need to force quiescent state. */ +#ifdef CONFIG_NO_HZ +#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK +#else /* #ifdef CONFIG_NO_HZ */ +#define RCU_SIGNAL_INIT RCU_FORCE_QS +#endif /* #else #ifdef CONFIG_NO_HZ */ + +#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR +#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */ +#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ + /* to take at least one */ + /* scheduling clock irq */ + /* before ratting on them. */ + +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + +/* + * RCU global state, including node hierarchy. This hierarchy is + * represented in "heap" form in a dense array. The root (first level) + * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second + * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]), + * and the third level in ->node[m+1] and following (->node[m+1] referenced + * by ->level[2]). The number of levels is determined by the number of + * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy" + * consisting of a single rcu_node. + */ +struct rcu_state { + struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ + struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ + u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ + u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ + struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */ + + /* The following fields are guarded by the root rcu_node's lock. */ + + u8 signaled ____cacheline_internodealigned_in_smp; + /* Force QS state. */ + long gpnum; /* Current gp number. */ + long completed; /* # of last completed gp. */ + spinlock_t onofflock; /* exclude on/offline and */ + /* starting new GP. */ + spinlock_t fqslock; /* Only one task forcing */ + /* quiescent states. */ + unsigned long jiffies_force_qs; /* Time at which to invoke */ + /* force_quiescent_state(). */ + unsigned long n_force_qs; /* Number of calls to */ + /* force_quiescent_state(). */ + unsigned long n_force_qs_lh; /* ~Number of calls leaving */ + /* due to lock unavailable. */ + unsigned long n_force_qs_ngp; /* Number of calls leaving */ + /* due to no GP active. */ +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR + unsigned long gp_start; /* Time at which GP started, */ + /* but in jiffies. */ + unsigned long jiffies_stall; /* Time at which to check */ + /* for CPU stalls. */ +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ +#ifdef CONFIG_NO_HZ + long dynticks_completed; /* Value of completed @ snap. */ +#endif /* #ifdef CONFIG_NO_HZ */ +}; + +extern struct rcu_state rcu_state; +DECLARE_PER_CPU(struct rcu_data, rcu_data); + +extern struct rcu_state rcu_bh_state; +DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); + +/* + * Increment the quiescent state counter. + * The counter is a bit degenerated: We do not need to know + * how many quiescent states passed, just if there was at least + * one since the start of the grace period. Thus just a flag. + */ +static inline void rcu_qsctr_inc(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + rdp->passed_quiesc = 1; + rdp->passed_quiesc_completed = rdp->completed; +} +static inline void rcu_bh_qsctr_inc(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); + rdp->passed_quiesc = 1; + rdp->passed_quiesc_completed = rdp->completed; +} + +extern int rcu_pending(int cpu); +extern int rcu_needs_cpu(int cpu); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern struct lockdep_map rcu_lock_map; +# define rcu_read_acquire() \ + lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) +# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) +#else +# define rcu_read_acquire() do { } while (0) +# define rcu_read_release() do { } while (0) +#endif + +static inline void __rcu_read_lock(void) +{ + preempt_disable(); + __acquire(RCU); + rcu_read_acquire(); +} +static inline void __rcu_read_unlock(void) +{ + rcu_read_release(); + __release(RCU); + preempt_enable(); +} +static inline void __rcu_read_lock_bh(void) +{ + local_bh_disable(); + __acquire(RCU_BH); + rcu_read_acquire(); +} +static inline void __rcu_read_unlock_bh(void) +{ + rcu_read_release(); + __release(RCU_BH); + local_bh_enable(); +} + +#define __synchronize_sched() synchronize_rcu() + +#define call_rcu_sched(head, func) call_rcu(head, func) + +static inline void rcu_init_sched(void) +{ +} + +extern void __rcu_init(void); +extern void rcu_check_callbacks(int cpu, int user); +extern void rcu_restart_cpu(int cpu); + +extern long rcu_batches_completed(void); +extern long rcu_batches_completed_bh(void); + +#ifdef CONFIG_NO_HZ +void rcu_enter_nohz(void); +void rcu_exit_nohz(void); +#else /* CONFIG_NO_HZ */ +static inline void rcu_enter_nohz(void) +{ +} +static inline void rcu_exit_nohz(void) +{ +} +#endif /* CONFIG_NO_HZ */ + +#endif /* __LINUX_RCUTREE_H */ diff --git a/init/Kconfig b/init/Kconfig index f763762d544a..9dd7958a71f0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -928,10 +928,16 @@ source "block/Kconfig" config PREEMPT_NOTIFIERS bool -config CLASSIC_RCU - def_bool !PREEMPT_RCU +config TREE_RCU_TRACE + def_bool RCU_TRACE && TREE_RCU + select DEBUG_FS help - This option selects the classic RCU implementation that is - designed for best read-side performance on non-realtime - systems. Classic RCU is the default. Note that the - PREEMPT_RCU symbol is used to select/deselect this option. + This option provides tracing for the TREE_RCU implementation, + permitting Makefile to trivially select kernel/rcutree_trace.c. + +config PREEMPT_RCU_TRACE + def_bool RCU_TRACE && PREEMPT_RCU + select DEBUG_FS + help + This option provides tracing for the PREEMPT_RCU implementation, + permitting Makefile to trivially select kernel/rcupreempt_trace.c. diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 9fdba03dc1fc..463f29743ea0 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -52,10 +52,29 @@ config PREEMPT endchoice +choice + prompt "RCU Implementation" + default CLASSIC_RCU + +config CLASSIC_RCU + bool "Classic RCU" + help + This option selects the classic RCU implementation that is + designed for best read-side performance on non-realtime + systems. + + Select this option if you are unsure. + +config TREE_RCU + bool "Tree-based hierarchical RCU" + help + This option selects the RCU implementation that is + designed for very large SMP system with hundreds or + thousands of CPUs. + config PREEMPT_RCU bool "Preemptible RCU" depends on PREEMPT - default n help This option reduces the latency of the kernel by making certain RCU sections preemptible. Normally RCU code is non-preemptible, if @@ -64,16 +83,47 @@ config PREEMPT_RCU now-naive assumptions about each RCU read-side critical section remaining on a given CPU through its execution. - Say N if you are unsure. +endchoice config RCU_TRACE - bool "Enable tracing for RCU - currently stats in debugfs" - depends on PREEMPT_RCU - select DEBUG_FS - default y + bool "Enable tracing for RCU" + depends on TREE_RCU || PREEMPT_RCU help This option provides tracing in RCU which presents stats in debugfs for debugging RCU implementation. Say Y here if you want to enable RCU tracing Say N if you are unsure. + +config RCU_FANOUT + int "Tree-based hierarchical RCU fanout value" + range 2 64 if 64BIT + range 2 32 if !64BIT + depends on TREE_RCU + default 64 if 64BIT + default 32 if !64BIT + help + This option controls the fanout of hierarchical implementations + of RCU, allowing RCU to work efficiently on machines with + large numbers of CPUs. This value must be at least the cube + root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit + systems and up to 262,144 for 64-bit systems. + + Select a specific number if testing RCU itself. + Take the default if unsure. + +config RCU_FANOUT_EXACT + bool "Disable tree-based hierarchical RCU auto-balancing" + depends on TREE_RCU + default n + help + This option forces use of the exact RCU_FANOUT value specified, + regardless of imbalances in the hierarchy. This is useful for + testing RCU itself, and might one day be useful on systems with + strong NUMA behavior. + + Without RCU_FANOUT_EXACT, the code will balance the hierarchy. + + Say n if unsure. + + diff --git a/kernel/Makefile b/kernel/Makefile index 19fad003b19d..b4fdbbff5ec0 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -74,10 +74,10 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_SECCOMP) += seccomp.o obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o +obj-$(CONFIG_TREE_RCU) += rcutree.o obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o -ifeq ($(CONFIG_PREEMPT_RCU),y) -obj-$(CONFIG_RCU_TRACE) += rcupreempt_trace.o -endif +obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o +obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 59236e8b9daa..04982659875a 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -551,6 +551,16 @@ void rcu_irq_exit(void) } } +void rcu_nmi_enter(void) +{ + rcu_irq_enter(); +} + +void rcu_nmi_exit(void) +{ + rcu_irq_exit(); +} + static void dyntick_save_progress_counter(int cpu) { struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c index 35c2d3360ecf..7c2665cac172 100644 --- a/kernel/rcupreempt_trace.c +++ b/kernel/rcupreempt_trace.c @@ -149,12 +149,12 @@ static void rcupreempt_trace_sum(struct rcupreempt_trace *sp) sp->done_length += cp->done_length; sp->done_add += cp->done_add; sp->done_remove += cp->done_remove; - atomic_set(&sp->done_invoked, atomic_read(&cp->done_invoked)); + atomic_add(atomic_read(&cp->done_invoked), &sp->done_invoked); sp->rcu_check_callbacks += cp->rcu_check_callbacks; - atomic_set(&sp->rcu_try_flip_1, - atomic_read(&cp->rcu_try_flip_1)); - atomic_set(&sp->rcu_try_flip_e1, - atomic_read(&cp->rcu_try_flip_e1)); + atomic_add(atomic_read(&cp->rcu_try_flip_1), + &sp->rcu_try_flip_1); + atomic_add(atomic_read(&cp->rcu_try_flip_e1), + &sp->rcu_try_flip_e1); sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1; sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1; sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1; diff --git a/kernel/rcutree.c b/kernel/rcutree.c new file mode 100644 index 000000000000..a342b032112c --- /dev/null +++ b/kernel/rcutree.c @@ -0,0 +1,1535 @@ +/* + * Read-Copy Update mechanism for mutual exclusion + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Authors: Dipankar Sarma + * Manfred Spraul + * Paul E. McKenney Hierarchical version + * + * Based on the original work by Paul McKenney + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key rcu_lock_key; +struct lockdep_map rcu_lock_map = + STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); +EXPORT_SYMBOL_GPL(rcu_lock_map); +#endif + +/* Data structures. */ + +#define RCU_STATE_INITIALIZER(name) { \ + .level = { &name.node[0] }, \ + .levelcnt = { \ + NUM_RCU_LVL_0, /* root of hierarchy. */ \ + NUM_RCU_LVL_1, \ + NUM_RCU_LVL_2, \ + NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \ + }, \ + .signaled = RCU_SIGNAL_INIT, \ + .gpnum = -300, \ + .completed = -300, \ + .onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \ + .fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \ + .n_force_qs = 0, \ + .n_force_qs_ngp = 0, \ +} + +struct rcu_state rcu_state = RCU_STATE_INITIALIZER(rcu_state); +DEFINE_PER_CPU(struct rcu_data, rcu_data); + +struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); +DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); + +#ifdef CONFIG_NO_HZ +DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks); +#endif /* #ifdef CONFIG_NO_HZ */ + +static int blimit = 10; /* Maximum callbacks per softirq. */ +static int qhimark = 10000; /* If this many pending, ignore blimit. */ +static int qlowmark = 100; /* Once only this many pending, use blimit. */ + +static void force_quiescent_state(struct rcu_state *rsp, int relaxed); + +/* + * Return the number of RCU batches processed thus far for debug & stats. + */ +long rcu_batches_completed(void) +{ + return rcu_state.completed; +} +EXPORT_SYMBOL_GPL(rcu_batches_completed); + +/* + * Return the number of RCU BH batches processed thus far for debug & stats. + */ +long rcu_batches_completed_bh(void) +{ + return rcu_bh_state.completed; +} +EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); + +/* + * Does the CPU have callbacks ready to be invoked? + */ +static int +cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) +{ + return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]; +} + +/* + * Does the current CPU require a yet-as-unscheduled grace period? + */ +static int +cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) +{ + /* ACCESS_ONCE() because we are accessing outside of lock. */ + return *rdp->nxttail[RCU_DONE_TAIL] && + ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum); +} + +/* + * Return the root node of the specified rcu_state structure. + */ +static struct rcu_node *rcu_get_root(struct rcu_state *rsp) +{ + return &rsp->node[0]; +} + +#ifdef CONFIG_SMP + +/* + * If the specified CPU is offline, tell the caller that it is in + * a quiescent state. Otherwise, whack it with a reschedule IPI. + * Grace periods can end up waiting on an offline CPU when that + * CPU is in the process of coming online -- it will be added to the + * rcu_node bitmasks before it actually makes it online. The same thing + * can happen while a CPU is in the process of coming online. Because this + * race is quite rare, we check for it after detecting that the grace + * period has been delayed rather than checking each and every CPU + * each and every time we start a new grace period. + */ +static int rcu_implicit_offline_qs(struct rcu_data *rdp) +{ + /* + * If the CPU is offline, it is in a quiescent state. We can + * trust its state not to change because interrupts are disabled. + */ + if (cpu_is_offline(rdp->cpu)) { + rdp->offline_fqs++; + return 1; + } + + /* The CPU is online, so send it a reschedule IPI. */ + if (rdp->cpu != smp_processor_id()) + smp_send_reschedule(rdp->cpu); + else + set_need_resched(); + rdp->resched_ipi++; + return 0; +} + +#endif /* #ifdef CONFIG_SMP */ + +#ifdef CONFIG_NO_HZ +static DEFINE_RATELIMIT_STATE(rcu_rs, 10 * HZ, 5); + +/** + * rcu_enter_nohz - inform RCU that current CPU is entering nohz + * + * Enter nohz mode, in other words, -leave- the mode in which RCU + * read-side critical sections can occur. (Though RCU read-side + * critical sections can occur in irq handlers in nohz mode, a possibility + * handled by rcu_irq_enter() and rcu_irq_exit()). + */ +void rcu_enter_nohz(void) +{ + unsigned long flags; + struct rcu_dynticks *rdtp; + + smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ + local_irq_save(flags); + rdtp = &__get_cpu_var(rcu_dynticks); + rdtp->dynticks++; + rdtp->dynticks_nesting--; + WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs); + local_irq_restore(flags); +} + +/* + * rcu_exit_nohz - inform RCU that current CPU is leaving nohz + * + * Exit nohz mode, in other words, -enter- the mode in which RCU + * read-side critical sections normally occur. + */ +void rcu_exit_nohz(void) +{ + unsigned long flags; + struct rcu_dynticks *rdtp; + + local_irq_save(flags); + rdtp = &__get_cpu_var(rcu_dynticks); + rdtp->dynticks++; + rdtp->dynticks_nesting++; + WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs); + local_irq_restore(flags); + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ +} + +/** + * rcu_nmi_enter - inform RCU of entry to NMI context + * + * If the CPU was idle with dynamic ticks active, and there is no + * irq handler running, this updates rdtp->dynticks_nmi to let the + * RCU grace-period handling know that the CPU is active. + */ +void rcu_nmi_enter(void) +{ + struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + + if (rdtp->dynticks & 0x1) + return; + rdtp->dynticks_nmi++; + WARN_ON_RATELIMIT(!(rdtp->dynticks_nmi & 0x1), &rcu_rs); + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ +} + +/** + * rcu_nmi_exit - inform RCU of exit from NMI context + * + * If the CPU was idle with dynamic ticks active, and there is no + * irq handler running, this updates rdtp->dynticks_nmi to let the + * RCU grace-period handling know that the CPU is no longer active. + */ +void rcu_nmi_exit(void) +{ + struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + + if (rdtp->dynticks & 0x1) + return; + smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ + rdtp->dynticks_nmi++; + WARN_ON_RATELIMIT(rdtp->dynticks_nmi & 0x1, &rcu_rs); +} + +/** + * rcu_irq_enter - inform RCU of entry to hard irq context + * + * If the CPU was idle with dynamic ticks active, this updates the + * rdtp->dynticks to let the RCU handling know that the CPU is active. + */ +void rcu_irq_enter(void) +{ + struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + + if (rdtp->dynticks_nesting++) + return; + rdtp->dynticks++; + WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs); + smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ +} + +/** + * rcu_irq_exit - inform RCU of exit from hard irq context + * + * If the CPU was idle with dynamic ticks active, update the rdp->dynticks + * to put let the RCU handling be aware that the CPU is going back to idle + * with no ticks. + */ +void rcu_irq_exit(void) +{ + struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + + if (--rdtp->dynticks_nesting) + return; + smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ + rdtp->dynticks++; + WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs); + + /* If the interrupt queued a callback, get out of dyntick mode. */ + if (__get_cpu_var(rcu_data).nxtlist || + __get_cpu_var(rcu_bh_data).nxtlist) + set_need_resched(); +} + +/* + * Record the specified "completed" value, which is later used to validate + * dynticks counter manipulations. Specify "rsp->completed - 1" to + * unconditionally invalidate any future dynticks manipulations (which is + * useful at the beginning of a grace period). + */ +static void dyntick_record_completed(struct rcu_state *rsp, long comp) +{ + rsp->dynticks_completed = comp; +} + +#ifdef CONFIG_SMP + +/* + * Recall the previously recorded value of the completion for dynticks. + */ +static long dyntick_recall_completed(struct rcu_state *rsp) +{ + return rsp->dynticks_completed; +} + +/* + * Snapshot the specified CPU's dynticks counter so that we can later + * credit them with an implicit quiescent state. Return 1 if this CPU + * is already in a quiescent state courtesy of dynticks idle mode. + */ +static int dyntick_save_progress_counter(struct rcu_data *rdp) +{ + int ret; + int snap; + int snap_nmi; + + snap = rdp->dynticks->dynticks; + snap_nmi = rdp->dynticks->dynticks_nmi; + smp_mb(); /* Order sampling of snap with end of grace period. */ + rdp->dynticks_snap = snap; + rdp->dynticks_nmi_snap = snap_nmi; + ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0); + if (ret) + rdp->dynticks_fqs++; + return ret; +} + +/* + * Return true if the specified CPU has passed through a quiescent + * state by virtue of being in or having passed through an dynticks + * idle state since the last call to dyntick_save_progress_counter() + * for this same CPU. + */ +static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) +{ + long curr; + long curr_nmi; + long snap; + long snap_nmi; + + curr = rdp->dynticks->dynticks; + snap = rdp->dynticks_snap; + curr_nmi = rdp->dynticks->dynticks_nmi; + snap_nmi = rdp->dynticks_nmi_snap; + smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ + + /* + * If the CPU passed through or entered a dynticks idle phase with + * no active irq/NMI handlers, then we can safely pretend that the CPU + * already acknowledged the request to pass through a quiescent + * state. Either way, that CPU cannot possibly be in an RCU + * read-side critical section that started before the beginning + * of the current RCU grace period. + */ + if ((curr != snap || (curr & 0x1) == 0) && + (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) { + rdp->dynticks_fqs++; + return 1; + } + + /* Go check for the CPU being offline. */ + return rcu_implicit_offline_qs(rdp); +} + +#endif /* #ifdef CONFIG_SMP */ + +#else /* #ifdef CONFIG_NO_HZ */ + +static void dyntick_record_completed(struct rcu_state *rsp, long comp) +{ +} + +#ifdef CONFIG_SMP + +/* + * If there are no dynticks, then the only way that a CPU can passively + * be in a quiescent state is to be offline. Unlike dynticks idle, which + * is a point in time during the prior (already finished) grace period, + * an offline CPU is always in a quiescent state, and thus can be + * unconditionally applied. So just return the current value of completed. + */ +static long dyntick_recall_completed(struct rcu_state *rsp) +{ + return rsp->completed; +} + +static int dyntick_save_progress_counter(struct rcu_data *rdp) +{ + return 0; +} + +static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) +{ + return rcu_implicit_offline_qs(rdp); +} + +#endif /* #ifdef CONFIG_SMP */ + +#endif /* #else #ifdef CONFIG_NO_HZ */ + +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR + +static void record_gp_stall_check_time(struct rcu_state *rsp) +{ + rsp->gp_start = jiffies; + rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK; +} + +static void print_other_cpu_stall(struct rcu_state *rsp) +{ + int cpu; + long delta; + unsigned long flags; + struct rcu_node *rnp = rcu_get_root(rsp); + struct rcu_node *rnp_cur = rsp->level[NUM_RCU_LVLS - 1]; + struct rcu_node *rnp_end = &rsp->node[NUM_RCU_NODES]; + + /* Only let one CPU complain about others per time interval. */ + + spin_lock_irqsave(&rnp->lock, flags); + delta = jiffies - rsp->jiffies_stall; + if (delta < RCU_STALL_RAT_DELAY || rsp->gpnum == rsp->completed) { + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; + spin_unlock_irqrestore(&rnp->lock, flags); + + /* OK, time to rat on our buddy... */ + + printk(KERN_ERR "INFO: RCU detected CPU stalls:"); + for (; rnp_cur < rnp_end; rnp_cur++) { + if (rnp_cur->qsmask == 0) + continue; + for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++) + if (rnp_cur->qsmask & (1UL << cpu)) + printk(" %d", rnp_cur->grplo + cpu); + } + printk(" (detected by %d, t=%ld jiffies)\n", + smp_processor_id(), (long)(jiffies - rsp->gp_start)); + force_quiescent_state(rsp, 0); /* Kick them all. */ +} + +static void print_cpu_stall(struct rcu_state *rsp) +{ + unsigned long flags; + struct rcu_node *rnp = rcu_get_root(rsp); + + printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n", + smp_processor_id(), jiffies - rsp->gp_start); + dump_stack(); + spin_lock_irqsave(&rnp->lock, flags); + if ((long)(jiffies - rsp->jiffies_stall) >= 0) + rsp->jiffies_stall = + jiffies + RCU_SECONDS_TILL_STALL_RECHECK; + spin_unlock_irqrestore(&rnp->lock, flags); + set_need_resched(); /* kick ourselves to get things going. */ +} + +static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) +{ + long delta; + struct rcu_node *rnp; + + delta = jiffies - rsp->jiffies_stall; + rnp = rdp->mynode; + if ((rnp->qsmask & rdp->grpmask) && delta >= 0) { + + /* We haven't checked in, so go dump stack. */ + print_cpu_stall(rsp); + + } else if (rsp->gpnum != rsp->completed && + delta >= RCU_STALL_RAT_DELAY) { + + /* They had two time units to dump stack, so complain. */ + print_other_cpu_stall(rsp); + } +} + +#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + +static void record_gp_stall_check_time(struct rcu_state *rsp) +{ +} + +static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) +{ +} + +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + +/* + * Update CPU-local rcu_data state to record the newly noticed grace period. + * This is used both when we started the grace period and when we notice + * that someone else started the grace period. + */ +static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) +{ + rdp->qs_pending = 1; + rdp->passed_quiesc = 0; + rdp->gpnum = rsp->gpnum; + rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending + + RCU_JIFFIES_TILL_FORCE_QS; +} + +/* + * Did someone else start a new RCU grace period start since we last + * checked? Update local state appropriately if so. Must be called + * on the CPU corresponding to rdp. + */ +static int +check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp) +{ + unsigned long flags; + int ret = 0; + + local_irq_save(flags); + if (rdp->gpnum != rsp->gpnum) { + note_new_gpnum(rsp, rdp); + ret = 1; + } + local_irq_restore(flags); + return ret; +} + +/* + * Start a new RCU grace period if warranted, re-initializing the hierarchy + * in preparation for detecting the next grace period. The caller must hold + * the root node's ->lock, which is released before return. Hard irqs must + * be disabled. + */ +static void +rcu_start_gp(struct rcu_state *rsp, unsigned long flags) + __releases(rcu_get_root(rsp)->lock) +{ + struct rcu_data *rdp = rsp->rda[smp_processor_id()]; + struct rcu_node *rnp = rcu_get_root(rsp); + struct rcu_node *rnp_cur; + struct rcu_node *rnp_end; + + if (!cpu_needs_another_gp(rsp, rdp)) { + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + + /* Advance to a new grace period and initialize state. */ + rsp->gpnum++; + rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ + rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; + rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending + + RCU_JIFFIES_TILL_FORCE_QS; + record_gp_stall_check_time(rsp); + dyntick_record_completed(rsp, rsp->completed - 1); + note_new_gpnum(rsp, rdp); + + /* + * Because we are first, we know that all our callbacks will + * be covered by this upcoming grace period, even the ones + * that were registered arbitrarily recently. + */ + rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + + /* Special-case the common single-level case. */ + if (NUM_RCU_NODES == 1) { + rnp->qsmask = rnp->qsmaskinit; + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + + spin_unlock(&rnp->lock); /* leave irqs disabled. */ + + + /* Exclude any concurrent CPU-hotplug operations. */ + spin_lock(&rsp->onofflock); /* irqs already disabled. */ + + /* + * Set the quiescent-state-needed bits in all the non-leaf RCU + * nodes for all currently online CPUs. This operation relies + * on the layout of the hierarchy within the rsp->node[] array. + * Note that other CPUs will access only the leaves of the + * hierarchy, which still indicate that no grace period is in + * progress. In addition, we have excluded CPU-hotplug operations. + * + * We therefore do not need to hold any locks. Any required + * memory barriers will be supplied by the locks guarding the + * leaf rcu_nodes in the hierarchy. + */ + + rnp_end = rsp->level[NUM_RCU_LVLS - 1]; + for (rnp_cur = &rsp->node[0]; rnp_cur < rnp_end; rnp_cur++) + rnp_cur->qsmask = rnp_cur->qsmaskinit; + + /* + * Now set up the leaf nodes. Here we must be careful. First, + * we need to hold the lock in order to exclude other CPUs, which + * might be contending for the leaf nodes' locks. Second, as + * soon as we initialize a given leaf node, its CPUs might run + * up the rest of the hierarchy. We must therefore acquire locks + * for each node that we touch during this stage. (But we still + * are excluding CPU-hotplug operations.) + * + * Note that the grace period cannot complete until we finish + * the initialization process, as there will be at least one + * qsmask bit set in the root node until that time, namely the + * one corresponding to this CPU. + */ + rnp_end = &rsp->node[NUM_RCU_NODES]; + rnp_cur = rsp->level[NUM_RCU_LVLS - 1]; + for (; rnp_cur < rnp_end; rnp_cur++) { + spin_lock(&rnp_cur->lock); /* irqs already disabled. */ + rnp_cur->qsmask = rnp_cur->qsmaskinit; + spin_unlock(&rnp_cur->lock); /* irqs already disabled. */ + } + + rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ + spin_unlock_irqrestore(&rsp->onofflock, flags); +} + +/* + * Advance this CPU's callbacks, but only if the current grace period + * has ended. This may be called only from the CPU to whom the rdp + * belongs. + */ +static void +rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp) +{ + long completed_snap; + unsigned long flags; + + local_irq_save(flags); + completed_snap = ACCESS_ONCE(rsp->completed); /* outside of lock. */ + + /* Did another grace period end? */ + if (rdp->completed != completed_snap) { + + /* Advance callbacks. No harm if list empty. */ + rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL]; + rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL]; + rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + + /* Remember that we saw this grace-period completion. */ + rdp->completed = completed_snap; + } + local_irq_restore(flags); +} + +/* + * Similar to cpu_quiet(), for which it is a helper function. Allows + * a group of CPUs to be quieted at one go, though all the CPUs in the + * group must be represented by the same leaf rcu_node structure. + * That structure's lock must be held upon entry, and it is released + * before return. + */ +static void +cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, + unsigned long flags) + __releases(rnp->lock) +{ + /* Walk up the rcu_node hierarchy. */ + for (;;) { + if (!(rnp->qsmask & mask)) { + + /* Our bit has already been cleared, so done. */ + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + rnp->qsmask &= ~mask; + if (rnp->qsmask != 0) { + + /* Other bits still set at this level, so done. */ + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + mask = rnp->grpmask; + if (rnp->parent == NULL) { + + /* No more levels. Exit loop holding root lock. */ + + break; + } + spin_unlock_irqrestore(&rnp->lock, flags); + rnp = rnp->parent; + spin_lock_irqsave(&rnp->lock, flags); + } + + /* + * Get here if we are the last CPU to pass through a quiescent + * state for this grace period. Clean up and let rcu_start_gp() + * start up the next grace period if one is needed. Note that + * we still hold rnp->lock, as required by rcu_start_gp(), which + * will release it. + */ + rsp->completed = rsp->gpnum; + rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]); + rcu_start_gp(rsp, flags); /* releases rnp->lock. */ +} + +/* + * Record a quiescent state for the specified CPU, which must either be + * the current CPU or an offline CPU. The lastcomp argument is used to + * make sure we are still in the grace period of interest. We don't want + * to end the current grace period based on quiescent states detected in + * an earlier grace period! + */ +static void +cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) +{ + unsigned long flags; + unsigned long mask; + struct rcu_node *rnp; + + rnp = rdp->mynode; + spin_lock_irqsave(&rnp->lock, flags); + if (lastcomp != ACCESS_ONCE(rsp->completed)) { + + /* + * Someone beat us to it for this grace period, so leave. + * The race with GP start is resolved by the fact that we + * hold the leaf rcu_node lock, so that the per-CPU bits + * cannot yet be initialized -- so we would simply find our + * CPU's bit already cleared in cpu_quiet_msk() if this race + * occurred. + */ + rdp->passed_quiesc = 0; /* try again later! */ + spin_unlock_irqrestore(&rnp->lock, flags); + return; + } + mask = rdp->grpmask; + if ((rnp->qsmask & mask) == 0) { + spin_unlock_irqrestore(&rnp->lock, flags); + } else { + rdp->qs_pending = 0; + + /* + * This GP can't end until cpu checks in, so all of our + * callbacks can be processed during the next GP. + */ + rdp = rsp->rda[smp_processor_id()]; + rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + + cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */ + } +} + +/* + * Check to see if there is a new grace period of which this CPU + * is not yet aware, and if so, set up local rcu_data state for it. + * Otherwise, see if this CPU has just passed through its first + * quiescent state for this grace period, and record that fact if so. + */ +static void +rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) +{ + /* If there is now a new grace period, record and return. */ + if (check_for_new_grace_period(rsp, rdp)) + return; + + /* + * Does this CPU still need to do its part for current grace period? + * If no, return and let the other CPUs do their part as well. + */ + if (!rdp->qs_pending) + return; + + /* + * Was there a quiescent state since the beginning of the grace + * period? If no, then exit and wait for the next call. + */ + if (!rdp->passed_quiesc) + return; + + /* Tell RCU we are done (but cpu_quiet() will be the judge of that). */ + cpu_quiet(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); +} + +#ifdef CONFIG_HOTPLUG_CPU + +/* + * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy + * and move all callbacks from the outgoing CPU to the current one. + */ +static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) +{ + int i; + unsigned long flags; + long lastcomp; + unsigned long mask; + struct rcu_data *rdp = rsp->rda[cpu]; + struct rcu_data *rdp_me; + struct rcu_node *rnp; + + /* Exclude any attempts to start a new grace period. */ + spin_lock_irqsave(&rsp->onofflock, flags); + + /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ + rnp = rdp->mynode; + mask = rdp->grpmask; /* rnp->grplo is constant. */ + do { + spin_lock(&rnp->lock); /* irqs already disabled. */ + rnp->qsmaskinit &= ~mask; + if (rnp->qsmaskinit != 0) { + spin_unlock(&rnp->lock); /* irqs already disabled. */ + break; + } + mask = rnp->grpmask; + spin_unlock(&rnp->lock); /* irqs already disabled. */ + rnp = rnp->parent; + } while (rnp != NULL); + lastcomp = rsp->completed; + + spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ + + /* Being offline is a quiescent state, so go record it. */ + cpu_quiet(cpu, rsp, rdp, lastcomp); + + /* + * Move callbacks from the outgoing CPU to the running CPU. + * Note that the outgoing CPU is now quiscent, so it is now + * (uncharacteristically) safe to access it rcu_data structure. + * Note also that we must carefully retain the order of the + * outgoing CPU's callbacks in order for rcu_barrier() to work + * correctly. Finally, note that we start all the callbacks + * afresh, even those that have passed through a grace period + * and are therefore ready to invoke. The theory is that hotplug + * events are rare, and that if they are frequent enough to + * indefinitely delay callbacks, you have far worse things to + * be worrying about. + */ + rdp_me = rsp->rda[smp_processor_id()]; + if (rdp->nxtlist != NULL) { + *rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; + rdp_me->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; + rdp->nxtlist = NULL; + for (i = 0; i < RCU_NEXT_SIZE; i++) + rdp->nxttail[i] = &rdp->nxtlist; + rdp_me->qlen += rdp->qlen; + rdp->qlen = 0; + } + local_irq_restore(flags); +} + +/* + * Remove the specified CPU from the RCU hierarchy and move any pending + * callbacks that it might have to the current CPU. This code assumes + * that at least one CPU in the system will remain running at all times. + * Any attempt to offline -all- CPUs is likely to strand RCU callbacks. + */ +static void rcu_offline_cpu(int cpu) +{ + __rcu_offline_cpu(cpu, &rcu_state); + __rcu_offline_cpu(cpu, &rcu_bh_state); +} + +#else /* #ifdef CONFIG_HOTPLUG_CPU */ + +static void rcu_offline_cpu(int cpu) +{ +} + +#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ + +/* + * Invoke any RCU callbacks that have made it to the end of their grace + * period. Thottle as specified by rdp->blimit. + */ +static void rcu_do_batch(struct rcu_data *rdp) +{ + unsigned long flags; + struct rcu_head *next, *list, **tail; + int count; + + /* If no callbacks are ready, just return.*/ + if (!cpu_has_callbacks_ready_to_invoke(rdp)) + return; + + /* + * Extract the list of ready callbacks, disabling to prevent + * races with call_rcu() from interrupt handlers. + */ + local_irq_save(flags); + list = rdp->nxtlist; + rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; + *rdp->nxttail[RCU_DONE_TAIL] = NULL; + tail = rdp->nxttail[RCU_DONE_TAIL]; + for (count = RCU_NEXT_SIZE - 1; count >= 0; count--) + if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL]) + rdp->nxttail[count] = &rdp->nxtlist; + local_irq_restore(flags); + + /* Invoke callbacks. */ + count = 0; + while (list) { + next = list->next; + prefetch(next); + list->func(list); + list = next; + if (++count >= rdp->blimit) + break; + } + + local_irq_save(flags); + + /* Update count, and requeue any remaining callbacks. */ + rdp->qlen -= count; + if (list != NULL) { + *tail = rdp->nxtlist; + rdp->nxtlist = list; + for (count = 0; count < RCU_NEXT_SIZE; count++) + if (&rdp->nxtlist == rdp->nxttail[count]) + rdp->nxttail[count] = tail; + else + break; + } + + /* Reinstate batch limit if we have worked down the excess. */ + if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) + rdp->blimit = blimit; + + local_irq_restore(flags); + + /* Re-raise the RCU softirq if there are callbacks remaining. */ + if (cpu_has_callbacks_ready_to_invoke(rdp)) + raise_softirq(RCU_SOFTIRQ); +} + +/* + * Check to see if this CPU is in a non-context-switch quiescent state + * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). + * Also schedule the RCU softirq handler. + * + * This function must be called with hardirqs disabled. It is normally + * invoked from the scheduling-clock interrupt. If rcu_pending returns + * false, there is no point in invoking rcu_check_callbacks(). + */ +void rcu_check_callbacks(int cpu, int user) +{ + if (user || + (idle_cpu(cpu) && !in_softirq() && + hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + + /* + * Get here if this CPU took its interrupt from user + * mode or from the idle loop, and if this is not a + * nested interrupt. In this case, the CPU is in + * a quiescent state, so count it. + * + * No memory barrier is required here because both + * rcu_qsctr_inc() and rcu_bh_qsctr_inc() reference + * only CPU-local variables that other CPUs neither + * access nor modify, at least not while the corresponding + * CPU is online. + */ + + rcu_qsctr_inc(cpu); + rcu_bh_qsctr_inc(cpu); + + } else if (!in_softirq()) { + + /* + * Get here if this CPU did not take its interrupt from + * softirq, in other words, if it is not interrupting + * a rcu_bh read-side critical section. This is an _bh + * critical section, so count it. + */ + + rcu_bh_qsctr_inc(cpu); + } + raise_softirq(RCU_SOFTIRQ); +} + +#ifdef CONFIG_SMP + +/* + * Scan the leaf rcu_node structures, processing dyntick state for any that + * have not yet encountered a quiescent state, using the function specified. + * Returns 1 if the current grace period ends while scanning (possibly + * because we made it end). + */ +static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, + int (*f)(struct rcu_data *)) +{ + unsigned long bit; + int cpu; + unsigned long flags; + unsigned long mask; + struct rcu_node *rnp_cur = rsp->level[NUM_RCU_LVLS - 1]; + struct rcu_node *rnp_end = &rsp->node[NUM_RCU_NODES]; + + for (; rnp_cur < rnp_end; rnp_cur++) { + mask = 0; + spin_lock_irqsave(&rnp_cur->lock, flags); + if (rsp->completed != lastcomp) { + spin_unlock_irqrestore(&rnp_cur->lock, flags); + return 1; + } + if (rnp_cur->qsmask == 0) { + spin_unlock_irqrestore(&rnp_cur->lock, flags); + continue; + } + cpu = rnp_cur->grplo; + bit = 1; + for (; cpu <= rnp_cur->grphi; cpu++, bit <<= 1) { + if ((rnp_cur->qsmask & bit) != 0 && f(rsp->rda[cpu])) + mask |= bit; + } + if (mask != 0 && rsp->completed == lastcomp) { + + /* cpu_quiet_msk() releases rnp_cur->lock. */ + cpu_quiet_msk(mask, rsp, rnp_cur, flags); + continue; + } + spin_unlock_irqrestore(&rnp_cur->lock, flags); + } + return 0; +} + +/* + * Force quiescent states on reluctant CPUs, and also detect which + * CPUs are in dyntick-idle mode. + */ +static void force_quiescent_state(struct rcu_state *rsp, int relaxed) +{ + unsigned long flags; + long lastcomp; + struct rcu_data *rdp = rsp->rda[smp_processor_id()]; + struct rcu_node *rnp = rcu_get_root(rsp); + u8 signaled; + + if (ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum)) + return; /* No grace period in progress, nothing to force. */ + if (!spin_trylock_irqsave(&rsp->fqslock, flags)) { + rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ + return; /* Someone else is already on the job. */ + } + if (relaxed && + (long)(rsp->jiffies_force_qs - jiffies) >= 0 && + (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) >= 0) + goto unlock_ret; /* no emergency and done recently. */ + rsp->n_force_qs++; + spin_lock(&rnp->lock); + lastcomp = rsp->completed; + signaled = rsp->signaled; + rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; + rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending + + RCU_JIFFIES_TILL_FORCE_QS; + if (lastcomp == rsp->gpnum) { + rsp->n_force_qs_ngp++; + spin_unlock(&rnp->lock); + goto unlock_ret; /* no GP in progress, time updated. */ + } + spin_unlock(&rnp->lock); + switch (signaled) { + case RCU_GP_INIT: + + break; /* grace period still initializing, ignore. */ + + case RCU_SAVE_DYNTICK: + + if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) + break; /* So gcc recognizes the dead code. */ + + /* Record dyntick-idle state. */ + if (rcu_process_dyntick(rsp, lastcomp, + dyntick_save_progress_counter)) + goto unlock_ret; + + /* Update state, record completion counter. */ + spin_lock(&rnp->lock); + if (lastcomp == rsp->completed) { + rsp->signaled = RCU_FORCE_QS; + dyntick_record_completed(rsp, lastcomp); + } + spin_unlock(&rnp->lock); + break; + + case RCU_FORCE_QS: + + /* Check dyntick-idle state, send IPI to laggarts. */ + if (rcu_process_dyntick(rsp, dyntick_recall_completed(rsp), + rcu_implicit_dynticks_qs)) + goto unlock_ret; + + /* Leave state in case more forcing is required. */ + + break; + } +unlock_ret: + spin_unlock_irqrestore(&rsp->fqslock, flags); +} + +#else /* #ifdef CONFIG_SMP */ + +static void force_quiescent_state(struct rcu_state *rsp, int relaxed) +{ + set_need_resched(); +} + +#endif /* #else #ifdef CONFIG_SMP */ + +/* + * This does the RCU processing work from softirq context for the + * specified rcu_state and rcu_data structures. This may be called + * only from the CPU to whom the rdp belongs. + */ +static void +__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) +{ + unsigned long flags; + + /* + * If an RCU GP has gone long enough, go check for dyntick + * idle CPUs and, if needed, send resched IPIs. + */ + if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || + (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0) + force_quiescent_state(rsp, 1); + + /* + * Advance callbacks in response to end of earlier grace + * period that some other CPU ended. + */ + rcu_process_gp_end(rsp, rdp); + + /* Update RCU state based on any recent quiescent states. */ + rcu_check_quiescent_state(rsp, rdp); + + /* Does this CPU require a not-yet-started grace period? */ + if (cpu_needs_another_gp(rsp, rdp)) { + spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); + rcu_start_gp(rsp, flags); /* releases above lock */ + } + + /* If there are callbacks ready, invoke them. */ + rcu_do_batch(rdp); +} + +/* + * Do softirq processing for the current CPU. + */ +static void rcu_process_callbacks(struct softirq_action *unused) +{ + /* + * Memory references from any prior RCU read-side critical sections + * executed by the interrupted code must be seen before any RCU + * grace-period manipulations below. + */ + smp_mb(); /* See above block comment. */ + + __rcu_process_callbacks(&rcu_state, &__get_cpu_var(rcu_data)); + __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); + + /* + * Memory references from any later RCU read-side critical sections + * executed by the interrupted code must be seen after any RCU + * grace-period manipulations above. + */ + smp_mb(); /* See above block comment. */ +} + +static void +__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), + struct rcu_state *rsp) +{ + unsigned long flags; + struct rcu_data *rdp; + + head->func = func; + head->next = NULL; + + smp_mb(); /* Ensure RCU update seen before callback registry. */ + + /* + * Opportunistically note grace-period endings and beginnings. + * Note that we might see a beginning right after we see an + * end, but never vice versa, since this CPU has to pass through + * a quiescent state betweentimes. + */ + local_irq_save(flags); + rdp = rsp->rda[smp_processor_id()]; + rcu_process_gp_end(rsp, rdp); + check_for_new_grace_period(rsp, rdp); + + /* Add the callback to our list. */ + *rdp->nxttail[RCU_NEXT_TAIL] = head; + rdp->nxttail[RCU_NEXT_TAIL] = &head->next; + + /* Start a new grace period if one not already started. */ + if (ACCESS_ONCE(rsp->completed) == ACCESS_ONCE(rsp->gpnum)) { + unsigned long nestflag; + struct rcu_node *rnp_root = rcu_get_root(rsp); + + spin_lock_irqsave(&rnp_root->lock, nestflag); + rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ + } + + /* Force the grace period if too many callbacks or too long waiting. */ + if (unlikely(++rdp->qlen > qhimark)) { + rdp->blimit = LONG_MAX; + force_quiescent_state(rsp, 0); + } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || + (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0) + force_quiescent_state(rsp, 1); + local_irq_restore(flags); +} + +/* + * Queue an RCU callback for invocation after a grace period. + */ +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_state); +} +EXPORT_SYMBOL_GPL(call_rcu); + +/* + * Queue an RCU for invocation after a quicker grace period. + */ +void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + __call_rcu(head, func, &rcu_bh_state); +} +EXPORT_SYMBOL_GPL(call_rcu_bh); + +/* + * Check to see if there is any immediate RCU-related work to be done + * by the current CPU, for the specified type of RCU, returning 1 if so. + * The checks are in order of increasing expense: checks that can be + * carried out against CPU-local state are performed first. However, + * we must check for CPU stalls first, else we might not get a chance. + */ +static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) +{ + rdp->n_rcu_pending++; + + /* Check for CPU stalls, if enabled. */ + check_cpu_stall(rsp, rdp); + + /* Is the RCU core waiting for a quiescent state from this CPU? */ + if (rdp->qs_pending) + return 1; + + /* Does this CPU have callbacks ready to invoke? */ + if (cpu_has_callbacks_ready_to_invoke(rdp)) + return 1; + + /* Has RCU gone idle with this CPU needing another grace period? */ + if (cpu_needs_another_gp(rsp, rdp)) + return 1; + + /* Has another RCU grace period completed? */ + if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */ + return 1; + + /* Has a new RCU grace period started? */ + if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */ + return 1; + + /* Has an RCU GP gone long enough to send resched IPIs &c? */ + if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) && + ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || + (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)) + return 1; + + /* nothing to do */ + return 0; +} + +/* + * Check to see if there is any immediate RCU-related work to be done + * by the current CPU, returning 1 if so. This function is part of the + * RCU implementation; it is -not- an exported member of the RCU API. + */ +int rcu_pending(int cpu) +{ + return __rcu_pending(&rcu_state, &per_cpu(rcu_data, cpu)) || + __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)); +} + +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + */ +int rcu_needs_cpu(int cpu) +{ + /* RCU callbacks either ready or pending? */ + return per_cpu(rcu_data, cpu).nxtlist || + per_cpu(rcu_bh_data, cpu).nxtlist; +} + +/* + * Initialize a CPU's per-CPU RCU data. We take this "scorched earth" + * approach so that we don't have to worry about how long the CPU has + * been gone, or whether it ever was online previously. We do trust the + * ->mynode field, as it is constant for a given struct rcu_data and + * initialized during early boot. + * + * Note that only one online or offline event can be happening at a given + * time. Note also that we can accept some slop in the rsp->completed + * access due to the fact that this CPU cannot possibly have any RCU + * callbacks in flight yet. + */ +static void +rcu_init_percpu_data(int cpu, struct rcu_state *rsp) +{ + unsigned long flags; + int i; + long lastcomp; + unsigned long mask; + struct rcu_data *rdp = rsp->rda[cpu]; + struct rcu_node *rnp = rcu_get_root(rsp); + + /* Set up local state, ensuring consistent view of global state. */ + spin_lock_irqsave(&rnp->lock, flags); + lastcomp = rsp->completed; + rdp->completed = lastcomp; + rdp->gpnum = lastcomp; + rdp->passed_quiesc = 0; /* We could be racing with new GP, */ + rdp->qs_pending = 1; /* so set up to respond to current GP. */ + rdp->beenonline = 1; /* We have now been online. */ + rdp->passed_quiesc_completed = lastcomp - 1; + rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); + rdp->nxtlist = NULL; + for (i = 0; i < RCU_NEXT_SIZE; i++) + rdp->nxttail[i] = &rdp->nxtlist; + rdp->qlen = 0; + rdp->blimit = blimit; +#ifdef CONFIG_NO_HZ + rdp->dynticks = &per_cpu(rcu_dynticks, cpu); +#endif /* #ifdef CONFIG_NO_HZ */ + rdp->cpu = cpu; + spin_unlock(&rnp->lock); /* irqs remain disabled. */ + + /* + * A new grace period might start here. If so, we won't be part + * of it, but that is OK, as we are currently in a quiescent state. + */ + + /* Exclude any attempts to start a new GP on large systems. */ + spin_lock(&rsp->onofflock); /* irqs already disabled. */ + + /* Add CPU to rcu_node bitmasks. */ + rnp = rdp->mynode; + mask = rdp->grpmask; + do { + /* Exclude any attempts to start a new GP on small systems. */ + spin_lock(&rnp->lock); /* irqs already disabled. */ + rnp->qsmaskinit |= mask; + mask = rnp->grpmask; + spin_unlock(&rnp->lock); /* irqs already disabled. */ + rnp = rnp->parent; + } while (rnp != NULL && !(rnp->qsmaskinit & mask)); + + spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ + + /* + * A new grace period might start here. If so, we will be part of + * it, and its gpnum will be greater than ours, so we will + * participate. It is also possible for the gpnum to have been + * incremented before this function was called, and the bitmasks + * to not be filled out until now, in which case we will also + * participate due to our gpnum being behind. + */ + + /* Since it is coming online, the CPU is in a quiescent state. */ + cpu_quiet(cpu, rsp, rdp, lastcomp); + local_irq_restore(flags); +} + +static void __cpuinit rcu_online_cpu(int cpu) +{ +#ifdef CONFIG_NO_HZ + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + + rdtp->dynticks_nesting = 1; + rdtp->dynticks |= 1; /* need consecutive #s even for hotplug. */ + rdtp->dynticks_nmi = (rdtp->dynticks_nmi + 1) & ~0x1; +#endif /* #ifdef CONFIG_NO_HZ */ + rcu_init_percpu_data(cpu, &rcu_state); + rcu_init_percpu_data(cpu, &rcu_bh_state); + open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); +} + +/* + * Handle CPU online/offline notifcation events. + */ +static int __cpuinit rcu_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + long cpu = (long)hcpu; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + rcu_online_cpu(cpu); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: + rcu_offline_cpu(cpu); + break; + default: + break; + } + return NOTIFY_OK; +} + +/* + * Compute the per-level fanout, either using the exact fanout specified + * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. + */ +#ifdef CONFIG_RCU_FANOUT_EXACT +static void __init rcu_init_levelspread(struct rcu_state *rsp) +{ + int i; + + for (i = NUM_RCU_LVLS - 1; i >= 0; i--) + rsp->levelspread[i] = CONFIG_RCU_FANOUT; +} +#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ +static void __init rcu_init_levelspread(struct rcu_state *rsp) +{ + int ccur; + int cprv; + int i; + + cprv = NR_CPUS; + for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { + ccur = rsp->levelcnt[i]; + rsp->levelspread[i] = (cprv + ccur - 1) / ccur; + cprv = ccur; + } +} +#endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */ + +/* + * Helper function for rcu_init() that initializes one rcu_state structure. + */ +static void __init rcu_init_one(struct rcu_state *rsp) +{ + int cpustride = 1; + int i; + int j; + struct rcu_node *rnp; + + /* Initialize the level-tracking arrays. */ + + for (i = 1; i < NUM_RCU_LVLS; i++) + rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; + rcu_init_levelspread(rsp); + + /* Initialize the elements themselves, starting from the leaves. */ + + for (i = NUM_RCU_LVLS - 1; i >= 0; i--) { + cpustride *= rsp->levelspread[i]; + rnp = rsp->level[i]; + for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { + spin_lock_init(&rnp->lock); + rnp->qsmask = 0; + rnp->qsmaskinit = 0; + rnp->grplo = j * cpustride; + rnp->grphi = (j + 1) * cpustride - 1; + if (rnp->grphi >= NR_CPUS) + rnp->grphi = NR_CPUS - 1; + if (i == 0) { + rnp->grpnum = 0; + rnp->grpmask = 0; + rnp->parent = NULL; + } else { + rnp->grpnum = j % rsp->levelspread[i - 1]; + rnp->grpmask = 1UL << rnp->grpnum; + rnp->parent = rsp->level[i - 1] + + j / rsp->levelspread[i - 1]; + } + rnp->level = i; + } + } +} + +/* + * Helper macro for __rcu_init(). To be used nowhere else! + * Assigns leaf node pointers into each CPU's rcu_data structure. + */ +#define RCU_DATA_PTR_INIT(rsp, rcu_data) \ +do { \ + rnp = (rsp)->level[NUM_RCU_LVLS - 1]; \ + j = 0; \ + for_each_possible_cpu(i) { \ + if (i > rnp[j].grphi) \ + j++; \ + per_cpu(rcu_data, i).mynode = &rnp[j]; \ + (rsp)->rda[i] = &per_cpu(rcu_data, i); \ + } \ +} while (0) + +static struct notifier_block __cpuinitdata rcu_nb = { + .notifier_call = rcu_cpu_notify, +}; + +void __init __rcu_init(void) +{ + int i; /* All used by RCU_DATA_PTR_INIT(). */ + int j; + struct rcu_node *rnp; + + printk(KERN_WARNING "Experimental hierarchical RCU implementation.\n"); +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR + printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + rcu_init_one(&rcu_state); + RCU_DATA_PTR_INIT(&rcu_state, rcu_data); + rcu_init_one(&rcu_bh_state); + RCU_DATA_PTR_INIT(&rcu_bh_state, rcu_bh_data); + + for_each_online_cpu(i) + rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i); + /* Register notifier for non-boot CPUs */ + register_cpu_notifier(&rcu_nb); + printk(KERN_WARNING "Experimental hierarchical RCU init done.\n"); +} + +module_param(blimit, int, 0); +module_param(qhimark, int, 0); +module_param(qlowmark, int, 0); diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c new file mode 100644 index 000000000000..d6db3e837826 --- /dev/null +++ b/kernel/rcutree_trace.c @@ -0,0 +1,271 @@ +/* + * Read-Copy Update tracing for classic implementation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Papers: http://www.rdrop.com/users/paulmck/RCU + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) +{ + if (!rdp->beenonline) + return; + seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d rpfq=%ld rp=%x", + rdp->cpu, + cpu_is_offline(rdp->cpu) ? '!' : ' ', + rdp->completed, rdp->gpnum, + rdp->passed_quiesc, rdp->passed_quiesc_completed, + rdp->qs_pending, + rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending, + (int)(rdp->n_rcu_pending & 0xffff)); +#ifdef CONFIG_NO_HZ + seq_printf(m, " dt=%d/%d dn=%d df=%lu", + rdp->dynticks->dynticks, + rdp->dynticks->dynticks_nesting, + rdp->dynticks->dynticks_nmi, + rdp->dynticks_fqs); +#endif /* #ifdef CONFIG_NO_HZ */ + seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); + seq_printf(m, " ql=%ld b=%ld\n", rdp->qlen, rdp->blimit); +} + +#define PRINT_RCU_DATA(name, func, m) \ + do { \ + int _p_r_d_i; \ + \ + for_each_possible_cpu(_p_r_d_i) \ + func(m, &per_cpu(name, _p_r_d_i)); \ + } while (0) + +static int show_rcudata(struct seq_file *m, void *unused) +{ + seq_puts(m, "rcu:\n"); + PRINT_RCU_DATA(rcu_data, print_one_rcu_data, m); + seq_puts(m, "rcu_bh:\n"); + PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m); + return 0; +} + +static int rcudata_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_rcudata, NULL); +} + +static struct file_operations rcudata_fops = { + .owner = THIS_MODULE, + .open = rcudata_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) +{ + if (!rdp->beenonline) + return; + seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d,%ld,%ld", + rdp->cpu, + cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"", + rdp->completed, rdp->gpnum, + rdp->passed_quiesc, rdp->passed_quiesc_completed, + rdp->qs_pending, + rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending, + rdp->n_rcu_pending); +#ifdef CONFIG_NO_HZ + seq_printf(m, ",%d,%d,%d,%lu", + rdp->dynticks->dynticks, + rdp->dynticks->dynticks_nesting, + rdp->dynticks->dynticks_nmi, + rdp->dynticks_fqs); +#endif /* #ifdef CONFIG_NO_HZ */ + seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); + seq_printf(m, ",%ld,%ld\n", rdp->qlen, rdp->blimit); +} + +static int show_rcudata_csv(struct seq_file *m, void *unused) +{ + seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",\"rpfq\",\"rp\","); +#ifdef CONFIG_NO_HZ + seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); +#endif /* #ifdef CONFIG_NO_HZ */ + seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n"); + seq_puts(m, "\"rcu:\"\n"); + PRINT_RCU_DATA(rcu_data, print_one_rcu_data_csv, m); + seq_puts(m, "\"rcu_bh:\"\n"); + PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m); + return 0; +} + +static int rcudata_csv_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_rcudata_csv, NULL); +} + +static struct file_operations rcudata_csv_fops = { + .owner = THIS_MODULE, + .open = rcudata_csv_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) +{ + int level = 0; + struct rcu_node *rnp; + + seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x " + "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n", + rsp->completed, rsp->gpnum, rsp->signaled, + (long)(rsp->jiffies_force_qs - jiffies), + (int)(jiffies & 0xffff), + rsp->n_force_qs, rsp->n_force_qs_ngp, + rsp->n_force_qs - rsp->n_force_qs_ngp, + rsp->n_force_qs_lh); + for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { + if (rnp->level != level) { + seq_puts(m, "\n"); + level = rnp->level; + } + seq_printf(m, "%lx/%lx %d:%d ^%d ", + rnp->qsmask, rnp->qsmaskinit, + rnp->grplo, rnp->grphi, rnp->grpnum); + } + seq_puts(m, "\n"); +} + +static int show_rcuhier(struct seq_file *m, void *unused) +{ + seq_puts(m, "rcu:\n"); + print_one_rcu_state(m, &rcu_state); + seq_puts(m, "rcu_bh:\n"); + print_one_rcu_state(m, &rcu_bh_state); + return 0; +} + +static int rcuhier_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_rcuhier, NULL); +} + +static struct file_operations rcuhier_fops = { + .owner = THIS_MODULE, + .open = rcuhier_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int show_rcugp(struct seq_file *m, void *unused) +{ + seq_printf(m, "rcu: completed=%ld gpnum=%ld\n", + rcu_state.completed, rcu_state.gpnum); + seq_printf(m, "rcu_bh: completed=%ld gpnum=%ld\n", + rcu_bh_state.completed, rcu_bh_state.gpnum); + return 0; +} + +static int rcugp_open(struct inode *inode, struct file *file) +{ + return single_open(file, show_rcugp, NULL); +} + +static struct file_operations rcugp_fops = { + .owner = THIS_MODULE, + .open = rcugp_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir; +static int __init rcuclassic_trace_init(void) +{ + rcudir = debugfs_create_dir("rcu", NULL); + if (!rcudir) + goto out; + + datadir = debugfs_create_file("rcudata", 0444, rcudir, + NULL, &rcudata_fops); + if (!datadir) + goto free_out; + + datadir_csv = debugfs_create_file("rcudata.csv", 0444, rcudir, + NULL, &rcudata_csv_fops); + if (!datadir_csv) + goto free_out; + + gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops); + if (!gpdir) + goto free_out; + + hierdir = debugfs_create_file("rcuhier", 0444, rcudir, + NULL, &rcuhier_fops); + if (!hierdir) + goto free_out; + return 0; +free_out: + if (datadir) + debugfs_remove(datadir); + if (datadir_csv) + debugfs_remove(datadir_csv); + if (gpdir) + debugfs_remove(gpdir); + debugfs_remove(rcudir); +out: + return 1; +} + +static void __exit rcuclassic_trace_cleanup(void) +{ + debugfs_remove(datadir); + debugfs_remove(datadir_csv); + debugfs_remove(gpdir); + debugfs_remove(hierdir); + debugfs_remove(rcudir); +} + + +module_init(rcuclassic_trace_init); +module_exit(rcuclassic_trace_cleanup); + +MODULE_AUTHOR("Paul E. McKenney"); +MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation"); +MODULE_LICENSE("GPL"); diff --git a/kernel/softirq.c b/kernel/softirq.c index e7c69a720d69..80d323e6f61a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -269,6 +269,7 @@ void irq_enter(void) { int cpu = smp_processor_id(); + rcu_irq_enter(); if (idle_cpu(cpu) && !in_interrupt()) { __irq_enter(); tick_check_idle(cpu); @@ -295,9 +296,9 @@ void irq_exit(void) #ifdef CONFIG_NO_HZ /* Make sure that timer wheel updates are propagated */ - if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) - tick_nohz_stop_sched_tick(0); rcu_irq_exit(); + if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched()) + tick_nohz_stop_sched_tick(0); #endif preempt_enable_no_resched(); } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b0f239e443bc..465d822f3f5d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -619,6 +619,19 @@ config RCU_CPU_STALL_DETECTOR Say N if you are unsure. +config RCU_CPU_STALL_DETECTOR + bool "Check for stalled CPUs delaying RCU grace periods" + depends on CLASSIC_RCU || TREE_RCU + default n + help + This option causes RCU to printk information on which + CPUs are delaying the current grace period, but only when + the grace period extends for excessive time periods. + + Say Y if you want RCU to perform such checks. + + Say N if you are unsure. + config KPROBES_SANITY_TEST bool "Kprobes sanity tests" depends on DEBUG_KERNEL -- cgit v1.2.3 From 3c8bb73ace6249bd089b70c941440441940e3365 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 18 Dec 2008 11:41:27 -0800 Subject: x86: PAT: store vm_pgoff for all linear_over_vma_region mappings - v3 Impact: Code transformation, new functions added should have no effect. Drivers use mmap followed by pgprot_* and remap_pfn_range or vm_insert_pfn, in order to export reserved memory to userspace. Currently, such mappings are not tracked and hence not kept consistent with other mappings (/dev/mem, pci resource, ioremap) for the sme memory, that may exist in the system. The following patchset adds x86 PAT attribute tracking and untracking for pfnmap related APIs. First three patches in the patchset are changing the generic mm code to fit in this tracking. Last four patches are x86 specific to make things work with x86 PAT code. The patchset aso introduces pgprot_writecombine interface, which gives writecombine mapping when enabled, falling back to pgprot_noncached otherwise. This patch: While working on x86 PAT, we faced some hurdles with trackking remap_pfn_range() regions, as we do not have any information to say whether that PFNMAP mapping is linear for the entire vma range or it is smaller granularity regions within the vma. A simple solution to this is to use vm_pgoff as an indicator for linear mapping over the vma region. Currently, remap_pfn_range only sets vm_pgoff for COW mappings. Below patch changes the logic and sets the vm_pgoff irrespective of COW. This will still not be enough for the case where pfn is zero (vma region mapped to physical address zero). But, for all the other cases, we can look at pfnmap VMAs and say whether the mappng is for the entire vma region or not. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 9 +++++++++ mm/memory.c | 7 +++---- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ffee2f743418..2be8d9b5e46f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -145,6 +145,15 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ +static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) +{ + return ((vma->vm_flags & VM_PFNMAP) && vma->vm_pgoff); +} + +static inline int is_pfn_mapping(struct vm_area_struct *vma) +{ + return (vma->vm_flags & VM_PFNMAP); +} /* * vm_fault is filled by the the pagefault handler and passed to the vma's diff --git a/mm/memory.c b/mm/memory.c index 164951c47305..cef95c8c77fa 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1575,11 +1575,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, * behaviour that some programs depend on. We mark the "original" * un-COW'ed pages by matching them up with "vma->vm_pgoff". */ - if (is_cow_mapping(vma->vm_flags)) { - if (addr != vma->vm_start || end != vma->vm_end) - return -EINVAL; + if (addr == vma->vm_start && end == vma->vm_end) vma->vm_pgoff = pfn; - } + else if (is_cow_mapping(vma->vm_flags)) + return -EINVAL; vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; -- cgit v1.2.3 From e121e418441525b5636321fe03d16f0193ad218e Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 18 Dec 2008 11:41:28 -0800 Subject: x86: PAT: add follow_pfnmp_pte routine to help tracking pfnmap pages - v3 Impact: New currently unused interface. Add a generic interface to follow pfn in a pfnmap vma range. This is used by one of the subsequent x86 PAT related patch to keep track of memory types for vma regions across vma copy and free. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 3 +++ mm/memory.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2be8d9b5e46f..a25024ff9c11 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1223,6 +1223,9 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ +int follow_pfnmap_pte(struct vm_area_struct *vma, + unsigned long address, pte_t *ret_ptep); + typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, diff --git a/mm/memory.c b/mm/memory.c index cef95c8c77fa..8ca6bbf34ad6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1111,6 +1111,49 @@ no_page_table: return page; } +int follow_pfnmap_pte(struct vm_area_struct *vma, unsigned long address, + pte_t *ret_ptep) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep, pte; + spinlock_t *ptl; + struct page *page; + struct mm_struct *mm = vma->vm_mm; + + if (!is_pfn_mapping(vma)) + goto err; + + page = NULL; + pgd = pgd_offset(mm, address); + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) + goto err; + + pud = pud_offset(pgd, address); + if (pud_none(*pud) || unlikely(pud_bad(*pud))) + goto err; + + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + goto err; + + ptep = pte_offset_map_lock(mm, pmd, address, &ptl); + + pte = *ptep; + if (!pte_present(pte)) + goto err_unlock; + + *ret_ptep = pte; + pte_unmap_unlock(ptep, ptl); + return 0; + +err_unlock: + pte_unmap_unlock(ptep, ptl); +err: + return -EINVAL; +} + /* Can we do the FOLL_ANON optimization? */ static inline int use_zero_page(struct vm_area_struct *vma) { -- cgit v1.2.3 From 2ab640379a0ab4cef746ced1d7e04a0941774bcb Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 18 Dec 2008 11:41:29 -0800 Subject: x86: PAT: hooks in generic vm code to help archs to track pfnmap regions - v3 Impact: Introduces new hooks, which are currently null. Introduce generic hooks in remap_pfn_range and vm_insert_pfn and corresponding copy and free routines with reserve and free tracking. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 6 +++++ mm/memory.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 81 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a25024ff9c11..87ecb40e11a0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -155,6 +155,12 @@ static inline int is_pfn_mapping(struct vm_area_struct *vma) return (vma->vm_flags & VM_PFNMAP); } +extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, + unsigned long pfn, unsigned long size); +extern int track_pfn_vma_copy(struct vm_area_struct *vma); +extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, + unsigned long size); + /* * vm_fault is filled by the the pagefault handler and passed to the vma's * ->fault function. The vma's ->fault is responsible for returning a bitmask diff --git a/mm/memory.c b/mm/memory.c index 8ca6bbf34ad6..1e8f0d347c0e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -99,6 +99,50 @@ int randomize_va_space __read_mostly = 2; #endif +#ifndef track_pfn_vma_new +/* + * Interface that can be used by architecture code to keep track of + * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) + * + * track_pfn_vma_new is called when a _new_ pfn mapping is being established + * for physical range indicated by pfn and size. + */ +int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, + unsigned long pfn, unsigned long size) +{ + return 0; +} +#endif + +#ifndef track_pfn_vma_copy +/* + * Interface that can be used by architecture code to keep track of + * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) + * + * track_pfn_vma_copy is called when vma that is covering the pfnmap gets + * copied through copy_page_range(). + */ +int track_pfn_vma_copy(struct vm_area_struct *vma) +{ + return 0; +} +#endif + +#ifndef untrack_pfn_vma +/* + * Interface that can be used by architecture code to keep track of + * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) + * + * untrack_pfn_vma is called while unmapping a pfnmap for a region. + * untrack can be called for a specific region indicated by pfn and size or + * can be for the entire vma (in which case size can be zero). + */ +void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, + unsigned long size) +{ +} +#endif + static int __init disable_randmaps(char *s) { randomize_va_space = 0; @@ -669,6 +713,16 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); + if (is_pfn_mapping(vma)) { + /* + * We do not free on error cases below as remove_vma + * gets called on error from higher level routine + */ + ret = track_pfn_vma_copy(vma); + if (ret) + return ret; + } + /* * We need to invalidate the secondary MMU mappings only when * there could be a permission downgrade on the ptes of the @@ -915,6 +969,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, if (vma->vm_flags & VM_ACCOUNT) *nr_accounted += (end - start) >> PAGE_SHIFT; + if (is_pfn_mapping(vma)) + untrack_pfn_vma(vma, 0, 0); + while (start != end) { if (!tlb_start_valid) { tlb_start = start; @@ -1473,6 +1530,7 @@ out: int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) { + int ret; /* * Technically, architectures with pte_special can avoid all these * restrictions (same for remap_pfn_range). However we would like @@ -1487,7 +1545,15 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; - return insert_pfn(vma, addr, pfn, vma->vm_page_prot); + if (track_pfn_vma_new(vma, vma->vm_page_prot, pfn, PAGE_SIZE)) + return -EINVAL; + + ret = insert_pfn(vma, addr, pfn, vma->vm_page_prot); + + if (ret) + untrack_pfn_vma(vma, pfn, PAGE_SIZE); + + return ret; } EXPORT_SYMBOL(vm_insert_pfn); @@ -1625,6 +1691,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + err = track_pfn_vma_new(vma, prot, pfn, PAGE_ALIGN(size)); + if (err) + return -EINVAL; + BUG_ON(addr >= end); pfn -= addr >> PAGE_SHIFT; pgd = pgd_offset(mm, addr); @@ -1636,6 +1706,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, if (err) break; } while (pgd++, addr = next, addr != end); + + if (err) + untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size)); + return err; } EXPORT_SYMBOL(remap_pfn_range); -- cgit v1.2.3 From 2520bd3123c00272f818a176c92d03c7d0a113d6 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 18 Dec 2008 11:41:32 -0800 Subject: x86: PAT: add pgprot_writecombine() interface for drivers - v3 Impact: New mm functionality. Add pgprot_writecombine. pgprot_writecombine will be aliased to pgprot_noncached when not supported by the architecture. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable.h | 3 +++ arch/x86/mm/pat.c | 8 ++++++++ include/asm-generic/pgtable.h | 4 ++++ 3 files changed, 15 insertions(+) (limited to 'include') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 6968d4f6be3e..579f8ceee948 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -168,6 +168,9 @@ #ifndef __ASSEMBLY__ +#define pgprot_writecombine pgprot_writecombine +extern pgprot_t pgprot_writecombine(pgprot_t prot); + /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 1069ffecf77d..d5254bae84f4 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -832,6 +832,14 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, } } +pgprot_t pgprot_writecombine(pgprot_t prot) +{ + if (pat_enabled) + return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC); + else + return pgprot_noncached(prot); +} + #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) /* get Nth element of the linked list */ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index ef87f889ef62..b84633801fb6 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -129,6 +129,10 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres #define move_pte(pte, prot, old_addr, new_addr) (pte) #endif +#ifndef pgprot_writecombine +#define pgprot_writecombine pgprot_noncached +#endif + /* * When walking page tables, get the address of the next boundary, * or the end address of the range if that comes earlier. Although no -- cgit v1.2.3 From c71dd42db2c6f1637b92502a214587431c1a6ad2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 19 Dec 2008 01:09:51 +0100 Subject: tracing: fix warnings in kernel/trace/trace_sched_switch.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit these warnings: kernel/trace/trace_sched_switch.c: In function ‘tracing_sched_register’: kernel/trace/trace_sched_switch.c:96: warning: passing argument 1 of ‘register_trace_sched_wakeup_new’ from incompatible pointer type kernel/trace/trace_sched_switch.c:112: warning: passing argument 1 of ‘unregister_trace_sched_wakeup_new’ from incompatible pointer type kernel/trace/trace_sched_switch.c: In function ‘tracing_sched_unregister’: kernel/trace/trace_sched_switch.c:121: warning: passing argument 1 of ‘unregister_trace_sched_wakeup_new’ from incompatible pointer type Trigger because sched_wakeup_new tracepoints need the same trace signature as sched_wakeup - which was changed recently. Fix it. Signed-off-by: Ingo Molnar --- include/trace/sched.h | 4 ++-- kernel/sched.c | 2 +- kernel/trace/trace_sched_switch.c | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/sched.h b/include/trace/sched.h index f4549d506b16..bc4c9eadc6ba 100644 --- a/include/trace/sched.h +++ b/include/trace/sched.h @@ -21,8 +21,8 @@ DECLARE_TRACE(sched_wakeup, TPARGS(rq, p)); DECLARE_TRACE(sched_wakeup_new, - TPPROTO(struct rq *rq, struct task_struct *p), - TPARGS(rq, p)); + TPPROTO(struct rq *rq, struct task_struct *p, int success), + TPARGS(rq, p, success)); DECLARE_TRACE(sched_switch, TPPROTO(struct rq *rq, struct task_struct *prev, diff --git a/kernel/sched.c b/kernel/sched.c index d377097572f9..ac5a70a87d1e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2457,7 +2457,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) p->sched_class->task_new(rq, p); inc_nr_running(rq); } - trace_sched_wakeup_new(rq, p); + trace_sched_wakeup_new(rq, p, 1); check_preempt_curr(rq, p, 0); #ifdef CONFIG_SMP if (p->sched_class->task_wake_up) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 863390557b44..781d72ef873c 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -247,3 +247,4 @@ __init static int init_sched_switch_trace(void) return register_tracer(&sched_switch_trace); } device_initcall(init_sched_switch_trace); + -- cgit v1.2.3 From 078a55db075bf4dcc03115dc94875b3dd83f69d4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 18 Dec 2008 16:57:52 -0800 Subject: sparseirq: add kernel-doc notation for new member in irq_desc, -v2 Signed-off-by: Yinghai Lu Acked-by: Randy Dunlap Signed-off-by: Ingo Molnar --- include/linux/irq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 36a015746788..7fa7f30fccb6 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -134,6 +134,9 @@ struct irq_2_iommu; /** * struct irq_desc - interrupt descriptor * @irq: interrupt number for this descriptor + * @timer_rand_state: pointer to timer rand state struct + * @kstat_irqs: irq stats per cpu + * @irq_2_iommu: iommu with this irq * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] * @chip: low level interrupt hardware access * @msi_desc: MSI descriptor -- cgit v1.2.3 From e76f42761197dd6e9405e2eeb35932acfede115a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 13 Nov 2008 17:30:13 -0600 Subject: ACPI: fix 2.6.28 acpi.debug_level regression acpi_early_init() was changed to over-write the cmdline param, making it really inconvenient to set debug flags at boot-time. Also, This sets the default level to "info", which is what all the ACPI drivers use. So to enable messages from drivers, you only have to supply the "layer" (a.k.a. "component"). For non-"info" ACPI core and ACPI interpreter messages, you have to supply both level and layer masks, as before. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 11 +++++++---- drivers/acpi/bus.c | 8 -------- drivers/acpi/utilities/utglobal.c | 2 +- include/acpi/acoutput.h | 2 +- 4 files changed, 9 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e0f346d201ed..c9115c1b672c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -220,14 +220,17 @@ and is between 256 and 4096 characters. It is defined in the file Bits in debug_level correspond to a level in ACPI_DEBUG_PRINT statements, e.g., ACPI_DEBUG_PRINT((ACPI_DB_INFO, ... - See Documentation/acpi/debug.txt for more information - about debug layers and levels. + The debug_level mask defaults to "info". See + Documentation/acpi/debug.txt for more information about + debug layers and levels. + Enable processor driver info messages: + acpi.debug_layer=0x20000000 + Enable PCI/PCI interrupt routing info messages: + acpi.debug_layer=0x400000 Enable AML "Debug" output, i.e., stores to the Debug object while interpreting AML: acpi.debug_layer=0xffffffff acpi.debug_level=0x2 - Enable PCI/PCI interrupt routing info messages: - acpi.debug_layer=0x400000 acpi.debug_level=0x4 Enable all messages related to ACPI hardware: acpi.debug_layer=0x2 acpi.debug_level=0xffffffff diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 7edf6d913c13..765fd1c56cd6 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -688,14 +688,6 @@ void __init acpi_early_init(void) if (acpi_disabled) return; - /* - * ACPI CA initializes acpi_dbg_level to non-zero, which means - * we get debug output merely by turning on CONFIG_ACPI_DEBUG. - * Turn it off so we don't get output unless the user specifies - * acpi.debug_level. - */ - acpi_dbg_level = 0; - printk(KERN_INFO PREFIX "Core revision %08x\n", ACPI_CA_VERSION); /* enable workarounds, unless strict ACPI spec. compliance */ diff --git a/drivers/acpi/utilities/utglobal.c b/drivers/acpi/utilities/utglobal.c index 670551b95e56..17ed5ac840f7 100644 --- a/drivers/acpi/utilities/utglobal.c +++ b/drivers/acpi/utilities/utglobal.c @@ -64,7 +64,7 @@ u32 acpi_dbg_level = ACPI_DEBUG_DEFAULT; /* Debug switch - layer (component) mask */ -u32 acpi_dbg_layer = ACPI_COMPONENT_DEFAULT | ACPI_ALL_DRIVERS; +u32 acpi_dbg_layer = 0; u32 acpi_gbl_nesting_level = 0; /* Debugger globals */ diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h index 09d33c7740f0..db8852d8bcf7 100644 --- a/include/acpi/acoutput.h +++ b/include/acpi/acoutput.h @@ -172,7 +172,7 @@ /* Defaults for debug_level, debug and normal */ -#define ACPI_DEBUG_DEFAULT (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT) +#define ACPI_DEBUG_DEFAULT (ACPI_LV_INFO) #define ACPI_NORMAL_DEFAULT (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT) #define ACPI_DEBUG_ALL (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL) -- cgit v1.2.3 From abe1dfab60e1839d115930286cb421f5a5b193f3 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 26 Nov 2008 14:35:22 +0800 Subject: ACPI: don't cond_resched() when irqs_disabled() The ACPI interpreter usually runs with irqs enabled. However, during suspend/resume it runs with irqs disabled to evaluate _GTS/_BFS, as well as by irqrouter_resume() which evaluates _CRS, _PRS, _SRS. http://bugzilla.kernel.org/show_bug.cgi?id=12252 Signed-off-by: Wu Fengguang Signed-off-by: Len Brown --- include/acpi/platform/aclinux.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index 029c8c06c151..0515e754449d 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -141,6 +141,10 @@ static inline void *acpi_os_acquire_object(acpi_cache_t * cache) /* * We need to show where it is safe to preempt execution of ACPICA */ -#define ACPI_PREEMPTION_POINT() cond_resched() +#define ACPI_PREEMPTION_POINT() \ + do { \ + if (!irqs_disabled()) \ + cond_resched(); \ + } while (0) #endif /* __ACLINUX_H__ */ -- cgit v1.2.3 From 420e7fabd9c6d907280ed6b3e40eef425c5d8d8d Mon Sep 17 00:00:00 2001 From: Henning Rogge Date: Thu, 11 Dec 2008 22:04:19 +0100 Subject: nl80211: Add signal strength and bandwith to nl80211station info This patch adds signal strength and transmission bitrate to the station_info of nl80211. Signed-off-by: Henning Rogge Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 31 ++++++++++++++++++++++++++ include/net/cfg80211.h | 40 ++++++++++++++++++++++++++++++++++ net/mac80211/cfg.c | 25 ++++++++++++++++++++- net/wireless/nl80211.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 152 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 04d4516f9c71..7501acfcfdc4 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -424,6 +424,32 @@ enum nl80211_sta_flags { NL80211_STA_FLAG_MAX = __NL80211_STA_FLAG_AFTER_LAST - 1 }; +/** + * enum nl80211_rate_info - bitrate information + * + * These attribute types are used with %NL80211_STA_INFO_TXRATE + * when getting information about the bitrate of a station. + * + * @__NL80211_RATE_INFO_INVALID: attribute number 0 is reserved + * @NL80211_RATE_INFO_BITRATE: total bitrate (u16, 100kbit/s) + * @NL80211_RATE_INFO_MCS: mcs index for 802.11n (u8) + * @NL80211_RATE_INFO_40_MHZ_WIDTH: 40 Mhz dualchannel bitrate + * @NL80211_RATE_INFO_SHORT_GI: 400ns guard interval + * @NL80211_RATE_INFO_MAX: highest rate_info number currently defined + * @__NL80211_RATE_INFO_AFTER_LAST: internal use + */ +enum nl80211_rate_info { + __NL80211_RATE_INFO_INVALID, + NL80211_RATE_INFO_BITRATE, + NL80211_RATE_INFO_MCS, + NL80211_RATE_INFO_40_MHZ_WIDTH, + NL80211_RATE_INFO_SHORT_GI, + + /* keep last */ + __NL80211_RATE_INFO_AFTER_LAST, + NL80211_RATE_INFO_MAX = __NL80211_RATE_INFO_AFTER_LAST - 1 +}; + /** * enum nl80211_sta_info - station information * @@ -436,6 +462,9 @@ enum nl80211_sta_flags { * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (u32, to this station) * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute + * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) + * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute + * containing info as possible, see &enum nl80211_sta_info_txrate. */ enum nl80211_sta_info { __NL80211_STA_INFO_INVALID, @@ -445,6 +474,8 @@ enum nl80211_sta_info { NL80211_STA_INFO_LLID, NL80211_STA_INFO_PLID, NL80211_STA_INFO_PLINK_STATE, + NL80211_STA_INFO_SIGNAL, + NL80211_STA_INFO_TX_BITRATE, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a0c0bf19496c..65e03ac93109 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -169,6 +169,9 @@ struct station_parameters { * @STATION_INFO_LLID: @llid filled * @STATION_INFO_PLID: @plid filled * @STATION_INFO_PLINK_STATE: @plink_state filled + * @STATION_INFO_SIGNAL: @signal filled + * @STATION_INFO_TX_BITRATE: @tx_bitrate fields are filled + * (tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs) */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -177,6 +180,39 @@ enum station_info_flags { STATION_INFO_LLID = 1<<3, STATION_INFO_PLID = 1<<4, STATION_INFO_PLINK_STATE = 1<<5, + STATION_INFO_SIGNAL = 1<<6, + STATION_INFO_TX_BITRATE = 1<<7, +}; + +/** + * enum station_info_rate_flags - bitrate info flags + * + * Used by the driver to indicate the specific rate transmission + * type for 802.11n transmissions. + * + * @RATE_INFO_FLAGS_MCS: @tx_bitrate_mcs filled + * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 Mhz width transmission + * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval + */ +enum rate_info_flags { + RATE_INFO_FLAGS_MCS = 1<<0, + RATE_INFO_FLAGS_40_MHZ_WIDTH = 1<<1, + RATE_INFO_FLAGS_SHORT_GI = 1<<2, +}; + +/** + * struct rate_info - bitrate information + * + * Information about a receiving or transmitting bitrate + * + * @flags: bitflag of flags from &enum rate_info_flags + * @mcs: mcs index if struct describes a 802.11n bitrate + * @legacy: bitrate in 100kbit/s for 802.11abg + */ +struct rate_info { + u8 flags; + u8 mcs; + u16 legacy; }; /** @@ -191,6 +227,8 @@ enum station_info_flags { * @llid: mesh local link id * @plid: mesh peer link id * @plink_state: mesh peer link state + * @signal: signal strength of last received packet in dBm + * @txrate: current unicast bitrate to this station */ struct station_info { u32 filled; @@ -200,6 +238,8 @@ struct station_info { u16 llid; u16 plid; u8 plink_state; + s8 signal; + struct rate_info txrate; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 7912eb14eca0..23b5eeaf7bc5 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -310,12 +310,35 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->filled = STATION_INFO_INACTIVE_TIME | STATION_INFO_RX_BYTES | - STATION_INFO_TX_BYTES; + STATION_INFO_TX_BYTES | + STATION_INFO_TX_BITRATE; sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); sinfo->rx_bytes = sta->rx_bytes; sinfo->tx_bytes = sta->tx_bytes; + if (sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { + sinfo->filled |= STATION_INFO_SIGNAL; + sinfo->signal = (s8)sta->last_signal; + } + + sinfo->txrate.flags = 0; + if (sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS) + sinfo->txrate.flags |= RATE_INFO_FLAGS_MCS; + if (sta->last_tx_rate.flags & IEEE80211_TX_RC_40_MHZ_WIDTH) + sinfo->txrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH; + if (sta->last_tx_rate.flags & IEEE80211_TX_RC_SHORT_GI) + sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI; + + if (!(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) { + struct ieee80211_supported_band *sband; + sband = sta->local->hw.wiphy->bands[ + sta->local->hw.conf.channel->band]; + sinfo->txrate.legacy = + sband->bitrates[sta->last_tx_rate.idx].bitrate; + } else + sinfo->txrate.mcs = sta->last_tx_rate.idx; + if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH sinfo->filled |= STATION_INFO_LLID | diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4335f76be71f..93c9b983ce08 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1091,12 +1091,46 @@ static int parse_station_flags(struct nlattr *nla, u32 *staflags) return 0; } +static u16 nl80211_calculate_bitrate(struct rate_info *rate) +{ + int modulation, streams, bitrate; + + if (!(rate->flags & RATE_INFO_FLAGS_MCS)) + return rate->legacy; + + /* the formula below does only work for MCS values smaller than 32 */ + if (rate->mcs >= 32) + return 0; + + modulation = rate->mcs & 7; + streams = (rate->mcs >> 3) + 1; + + bitrate = (rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) ? + 13500000 : 6500000; + + if (modulation < 4) + bitrate *= (modulation + 1); + else if (modulation == 4) + bitrate *= (modulation + 2); + else + bitrate *= (modulation + 3); + + bitrate *= streams; + + if (rate->flags & RATE_INFO_FLAGS_SHORT_GI) + bitrate = (bitrate / 9) * 10; + + /* do NOT round down here */ + return (bitrate + 50000) / 100000; +} + static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct net_device *dev, u8 *mac_addr, struct station_info *sinfo) { void *hdr; - struct nlattr *sinfoattr; + struct nlattr *sinfoattr, *txrate; + u16 bitrate; hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) @@ -1126,7 +1160,29 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, if (sinfo->filled & STATION_INFO_PLINK_STATE) NLA_PUT_U8(msg, NL80211_STA_INFO_PLINK_STATE, sinfo->plink_state); + if (sinfo->filled & STATION_INFO_SIGNAL) + NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL, + sinfo->signal); + if (sinfo->filled & STATION_INFO_TX_BITRATE) { + txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE); + if (!txrate) + goto nla_put_failure; + + /* nl80211_calculate_bitrate will return 0 for mcs >= 32 */ + bitrate = nl80211_calculate_bitrate(&sinfo->txrate); + if (bitrate > 0) + NLA_PUT_U16(msg, NL80211_RATE_INFO_BITRATE, bitrate); + if (sinfo->txrate.flags & RATE_INFO_FLAGS_MCS) + NLA_PUT_U8(msg, NL80211_RATE_INFO_MCS, + sinfo->txrate.mcs); + if (sinfo->txrate.flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) + NLA_PUT_FLAG(msg, NL80211_RATE_INFO_40_MHZ_WIDTH); + if (sinfo->txrate.flags & RATE_INFO_FLAGS_SHORT_GI) + NLA_PUT_FLAG(msg, NL80211_RATE_INFO_SHORT_GI); + + nla_nest_end(msg, txrate); + } nla_nest_end(msg, sinfoattr); return genlmsg_end(msg, hdr); -- cgit v1.2.3 From 094d05dc32fc2930e381189a942016e5561775d9 Mon Sep 17 00:00:00 2001 From: Sujith Date: Fri, 12 Dec 2008 11:57:43 +0530 Subject: mac80211: Fix HT channel selection HT management is done differently for AP and STA modes, unify to just the ->config() callback since HT is fundamentally a PHY property and cannot be per-BSS. Rename enum nl80211_sec_chan_offset as nl80211_channel_type to denote the channel type ( NO_HT, HT20, HT40+, HT40- ). Signed-off-by: Johannes Berg Signed-off-by: Sujith Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/main.c | 123 ++++++++------------------------- drivers/net/wireless/iwlwifi/iwl-agn.c | 18 +++-- drivers/net/wireless/mac80211_hwsim.c | 6 +- include/linux/nl80211.h | 22 +++--- include/net/cfg80211.h | 2 +- include/net/mac80211.h | 9 +-- net/mac80211/cfg.c | 4 +- net/mac80211/ht.c | 35 +++++++--- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/main.c | 27 +++----- net/mac80211/mlme.c | 1 + net/mac80211/util.c | 2 +- net/wireless/nl80211.c | 27 ++++---- 13 files changed, 109 insertions(+), 169 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c index 02e1771bb274..e22fea18bad6 100644 --- a/drivers/net/wireless/ath9k/main.c +++ b/drivers/net/wireless/ath9k/main.c @@ -623,37 +623,40 @@ static int ath_get_channel(struct ath_softc *sc, return -1; } -/* ext_chan_offset: (-1, 0, 1) (below, none, above) */ - static u32 ath_get_extchanmode(struct ath_softc *sc, struct ieee80211_channel *chan, - int ext_chan_offset, - enum ath9k_ht_macmode tx_chan_width) + enum nl80211_channel_type channel_type) { u32 chanmode = 0; switch (chan->band) { case IEEE80211_BAND_2GHZ: - if ((ext_chan_offset == 0) && - (tx_chan_width == ATH9K_HT_MACMODE_20)) + switch(channel_type) { + case NL80211_CHAN_NO_HT: + case NL80211_CHAN_HT20: chanmode = CHANNEL_G_HT20; - if ((ext_chan_offset == 1) && - (tx_chan_width == ATH9K_HT_MACMODE_2040)) + break; + case NL80211_CHAN_HT40PLUS: chanmode = CHANNEL_G_HT40PLUS; - if ((ext_chan_offset == -1) && - (tx_chan_width == ATH9K_HT_MACMODE_2040)) + break; + case NL80211_CHAN_HT40MINUS: chanmode = CHANNEL_G_HT40MINUS; + break; + } break; case IEEE80211_BAND_5GHZ: - if ((ext_chan_offset == 0) && - (tx_chan_width == ATH9K_HT_MACMODE_20)) + switch(channel_type) { + case NL80211_CHAN_NO_HT: + case NL80211_CHAN_HT20: chanmode = CHANNEL_A_HT20; - if ((ext_chan_offset == 1) && - (tx_chan_width == ATH9K_HT_MACMODE_2040)) + break; + case NL80211_CHAN_HT40PLUS: chanmode = CHANNEL_A_HT40PLUS; - if ((ext_chan_offset == -1) && - (tx_chan_width == ATH9K_HT_MACMODE_2040)) + break; + case NL80211_CHAN_HT40MINUS: chanmode = CHANNEL_A_HT40MINUS; + break; + } break; default: break; @@ -829,45 +832,15 @@ static void setup_ht_cap(struct ieee80211_sta_ht_cap *ht_info) ht_info->mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED; } -static void ath9k_ht_conf(struct ath_softc *sc, - struct ieee80211_bss_conf *bss_conf) -{ - if (sc->hw->conf.ht.enabled) { - if (bss_conf->ht.width_40_ok) - sc->tx_chan_width = ATH9K_HT_MACMODE_2040; - else - sc->tx_chan_width = ATH9K_HT_MACMODE_20; - - ath9k_hw_set11nmac2040(sc->sc_ah, sc->tx_chan_width); - - DPRINTF(sc, ATH_DBG_CONFIG, - "BSS Changed HT, chanwidth: %d\n", sc->tx_chan_width); - } -} - -static inline int ath_sec_offset(u8 ext_offset) -{ - if (ext_offset == IEEE80211_HT_PARAM_CHA_SEC_NONE) - return 0; - else if (ext_offset == IEEE80211_HT_PARAM_CHA_SEC_ABOVE) - return 1; - else if (ext_offset == IEEE80211_HT_PARAM_CHA_SEC_BELOW) - return -1; - - return 0; -} - static void ath9k_bss_assoc_info(struct ath_softc *sc, struct ieee80211_vif *vif, struct ieee80211_bss_conf *bss_conf) { - struct ieee80211_hw *hw = sc->hw; - struct ieee80211_channel *curchan = hw->conf.channel; struct ath_vap *avp = (void *)vif->drv_priv; - int pos; if (bss_conf->assoc) { - DPRINTF(sc, ATH_DBG_CONFIG, "Bss Info ASSOC %d\n", bss_conf->aid); + DPRINTF(sc, ATH_DBG_CONFIG, "Bss Info ASSOC %d, bssid: %pM\n", + bss_conf->aid, sc->sc_curbssid); /* New association, store aid */ if (avp->av_opmode == NL80211_IFTYPE_STATION) { @@ -886,40 +859,6 @@ static void ath9k_bss_assoc_info(struct ath_softc *sc, sc->sc_halstats.ns_avgtxrssi = ATH_RSSI_DUMMY_MARKER; sc->sc_halstats.ns_avgtxrate = ATH_RATE_DUMMY_MARKER; - /* Update chainmask */ - ath_update_chainmask(sc, hw->conf.ht.enabled); - - DPRINTF(sc, ATH_DBG_CONFIG, - "bssid %pM aid 0x%x\n", - sc->sc_curbssid, sc->sc_curaid); - - pos = ath_get_channel(sc, curchan); - if (pos == -1) { - DPRINTF(sc, ATH_DBG_FATAL, - "Invalid channel: %d\n", curchan->center_freq); - return; - } - - if (hw->conf.ht.enabled) { - int offset = - ath_sec_offset(bss_conf->ht.secondary_channel_offset); - sc->tx_chan_width = (bss_conf->ht.width_40_ok) ? - ATH9K_HT_MACMODE_2040 : ATH9K_HT_MACMODE_20; - - sc->sc_ah->ah_channels[pos].chanmode = - ath_get_extchanmode(sc, curchan, - offset, sc->tx_chan_width); - } else { - sc->sc_ah->ah_channels[pos].chanmode = - (curchan->band == IEEE80211_BAND_2GHZ) ? - CHANNEL_G : CHANNEL_A; - } - - /* set h/w channel */ - if (ath_set_channel(sc, &sc->sc_ah->ah_channels[pos]) < 0) - DPRINTF(sc, ATH_DBG_FATAL, "Unable to set channel: %d\n", - curchan->center_freq); - /* Start ANI */ mod_timer(&sc->sc_ani.timer, jiffies + msecs_to_jiffies(ATH_ANI_POLLINTERVAL)); @@ -2146,7 +2085,8 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) struct ath_softc *sc = hw->priv; struct ieee80211_conf *conf = &hw->conf; - if (changed & IEEE80211_CONF_CHANGE_CHANNEL) { + if (changed & (IEEE80211_CONF_CHANGE_CHANNEL | + IEEE80211_CONF_CHANGE_HT)) { struct ieee80211_channel *curchan = hw->conf.channel; int pos; @@ -2165,25 +2105,23 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) (curchan->band == IEEE80211_BAND_2GHZ) ? CHANNEL_G : CHANNEL_A; - if ((sc->sc_ah->ah_opmode == NL80211_IFTYPE_AP) && - (conf->ht.enabled)) { - sc->tx_chan_width = (!!conf->ht.sec_chan_offset) ? - ATH9K_HT_MACMODE_2040 : ATH9K_HT_MACMODE_20; + if (conf->ht.enabled) { + if (conf->ht.channel_type == NL80211_CHAN_HT40PLUS || + conf->ht.channel_type == NL80211_CHAN_HT40MINUS) + sc->tx_chan_width = ATH9K_HT_MACMODE_2040; sc->sc_ah->ah_channels[pos].chanmode = ath_get_extchanmode(sc, curchan, - conf->ht.sec_chan_offset, - sc->tx_chan_width); + conf->ht.channel_type); } if (ath_set_channel(sc, &sc->sc_ah->ah_channels[pos]) < 0) { DPRINTF(sc, ATH_DBG_FATAL, "Unable to set channel\n"); return -EINVAL; } - } - if (changed & IEEE80211_CONF_CHANGE_HT) ath_update_chainmask(sc, conf->ht.enabled); + } if (changed & IEEE80211_CONF_CHANGE_POWER) sc->sc_config.txpowlimit = 2 * conf->power_level; @@ -2417,9 +2355,6 @@ static void ath9k_bss_info_changed(struct ieee80211_hw *hw, sc->sc_flags &= ~SC_OP_PROTECT_ENABLE; } - if (changed & BSS_CHANGED_HT) - ath9k_ht_conf(sc, bss_conf); - if (changed & BSS_CHANGED_ASSOC) { DPRINTF(sc, ATH_DBG_CONFIG, "BSS Changed ASSOC %d\n", bss_conf->assoc); diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 2f5e86e12916..bbc1c8052ffa 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -515,19 +515,27 @@ static void iwl_ht_conf(struct iwl_priv *priv, iwl_conf->supported_chan_width = !!(ht_conf->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40); - iwl_conf->extension_chan_offset = bss_conf->ht.secondary_channel_offset; + /* + * XXX: The HT configuration needs to be moved into iwl_mac_config() + * to be done there correctly. + */ + + iwl_conf->extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; + if (priv->hw->conf.ht.channel_type == NL80211_CHAN_HT40MINUS) + iwl_conf->extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_BELOW; + else if(priv->hw->conf.ht.channel_type == NL80211_CHAN_HT40PLUS) + iwl_conf->extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_ABOVE; + /* If no above or below channel supplied disable FAT channel */ if (iwl_conf->extension_chan_offset != IEEE80211_HT_PARAM_CHA_SEC_ABOVE && - iwl_conf->extension_chan_offset != IEEE80211_HT_PARAM_CHA_SEC_BELOW) { - iwl_conf->extension_chan_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; + iwl_conf->extension_chan_offset != IEEE80211_HT_PARAM_CHA_SEC_BELOW) iwl_conf->supported_chan_width = 0; - } iwl_conf->sm_ps = (u8)((ht_conf->cap & IEEE80211_HT_CAP_SM_PS) >> 2); memcpy(&iwl_conf->mcs, &ht_conf->mcs, 16); - iwl_conf->tx_chan_width = bss_conf->ht.width_40_ok; + iwl_conf->tx_chan_width = iwl_conf->supported_chan_width != 0; iwl_conf->ht_protection = bss_conf->ht.operation_mode & IEEE80211_HT_OP_MODE_PROTECTION; iwl_conf->non_GF_STA_present = diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index fd5a537ac51d..f83d69e813d3 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -495,11 +495,9 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw, } if (changed & BSS_CHANGED_HT) { - printk(KERN_DEBUG " %s: HT: sec_ch_offs=%d width_40_ok=%d " - "op_mode=%d\n", + printk(KERN_DEBUG " %s: HT: op_mode=0x%x\n", wiphy_name(hw->wiphy), - info->ht.secondary_channel_offset, - info->ht.width_40_ok, info->ht.operation_mode); + info->ht.operation_mode); } if (changed & BSS_CHANGED_BASIC_RATES) { diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 7501acfcfdc4..e86ed59f9ad5 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -201,13 +201,13 @@ enum nl80211_commands { * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming) * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz - * @NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET: included with NL80211_ATTR_WIPHY_FREQ + * @NL80211_ATTR_WIPHY_CHANNEL_TYPE: included with NL80211_ATTR_WIPHY_FREQ * if HT20 or HT40 are allowed (i.e., 802.11n disabled if not included): - * NL80211_SEC_CHAN_NO_HT = HT not allowed (i.e., same as not including + * NL80211_CHAN_NO_HT = HT not allowed (i.e., same as not including * this attribute) - * NL80211_SEC_CHAN_DISABLED = HT20 only - * NL80211_SEC_CHAN_BELOW = secondary channel is below the primary channel - * NL80211_SEC_CHAN_ABOVE = secondary channel is above the primary channel + * NL80211_CHAN_HT20 = HT20 only + * NL80211_CHAN_HT40MINUS = secondary channel is below the primary channel + * NL80211_CHAN_HT40PLUS = secondary channel is above the primary channel * * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on * @NL80211_ATTR_IFNAME: network interface name @@ -344,7 +344,7 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_TXQ_PARAMS, NL80211_ATTR_WIPHY_FREQ, - NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET, + NL80211_ATTR_WIPHY_CHANNEL_TYPE, /* add attributes here, update the policy in nl80211.c */ @@ -805,10 +805,10 @@ enum nl80211_txq_q { NL80211_TXQ_Q_BK }; -enum nl80211_sec_chan_offset { - NL80211_SEC_CHAN_NO_HT /* No HT */, - NL80211_SEC_CHAN_DISABLED /* HT20 only */, - NL80211_SEC_CHAN_BELOW /* HT40- */, - NL80211_SEC_CHAN_ABOVE /* HT40+ */ +enum nl80211_channel_type { + NL80211_CHAN_NO_HT, + NL80211_CHAN_HT20, + NL80211_CHAN_HT40MINUS, + NL80211_CHAN_HT40PLUS }; #endif /* __LINUX_NL80211_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 65e03ac93109..23c0ab74ded6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -563,7 +563,7 @@ struct cfg80211_ops { int (*set_channel)(struct wiphy *wiphy, struct ieee80211_channel *chan, - enum nl80211_sec_chan_offset); + enum nl80211_channel_type channel_type); }; /* temporary wext handlers */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 046ce692a906..22ae72c58d76 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -165,14 +165,9 @@ enum ieee80211_bss_change { /** * struct ieee80211_bss_ht_conf - BSS's changing HT configuration - * @secondary_channel_offset: secondary channel offset, uses - * %IEEE80211_HT_PARAM_CHA_SEC_ values - * @width_40_ok: indicates that 40 MHz bandwidth may be used for TX * @operation_mode: HT operation mode (like in &struct ieee80211_ht_info) */ struct ieee80211_bss_ht_conf { - u8 secondary_channel_offset; - bool width_40_ok; u16 operation_mode; }; @@ -508,9 +503,7 @@ static inline int __deprecated __IEEE80211_CONF_SHORT_SLOT_TIME(void) struct ieee80211_ht_conf { bool enabled; - int sec_chan_offset; /* 0 = HT40 disabled; -1 = HT40 enabled, secondary - * channel below primary; 1 = HT40 enabled, - * secondary channel above primary */ + enum nl80211_channel_type channel_type; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 23b5eeaf7bc5..3ea0884c9432 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1122,12 +1122,12 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy, static int ieee80211_set_channel(struct wiphy *wiphy, struct ieee80211_channel *chan, - enum nl80211_sec_chan_offset sec_chan_offset) + enum nl80211_channel_type channel_type) { struct ieee80211_local *local = wiphy_priv(wiphy); local->oper_channel = chan; - local->oper_sec_chan_offset = sec_chan_offset; + local->oper_channel_type = channel_type; return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); } diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index a1eed7032c9b..5f510a13b9f0 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -98,6 +98,7 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss_ht_conf ht; u32 changed = 0; bool enable_ht = true, ht_changed; + enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; @@ -112,24 +113,36 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, ieee80211_channel_to_frequency(hti->control_chan)) enable_ht = false; - /* - * XXX: This is totally incorrect when there are multiple virtual - * interfaces, needs to be fixed later. - */ - ht_changed = local->hw.conf.ht.enabled != enable_ht; + if (enable_ht) { + channel_type = NL80211_CHAN_HT20; + + if (!(ap_ht_cap_flags & IEEE80211_HT_CAP_40MHZ_INTOLERANT) && + (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) && + (hti->ht_param & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY)) { + switch(hti->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET) { + case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: + channel_type = NL80211_CHAN_HT40PLUS; + break; + case IEEE80211_HT_PARAM_CHA_SEC_BELOW: + channel_type = NL80211_CHAN_HT40MINUS; + break; + } + } + } + + ht_changed = local->hw.conf.ht.enabled != enable_ht || + channel_type != local->hw.conf.ht.channel_type; + + local->oper_channel_type = channel_type; local->hw.conf.ht.enabled = enable_ht; + if (ht_changed) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_HT); /* disable HT */ if (!enable_ht) return 0; - ht.secondary_channel_offset = - hti->ht_param & IEEE80211_HT_PARAM_CHA_SEC_OFFSET; - ht.width_40_ok = - !(ap_ht_cap_flags & IEEE80211_HT_CAP_40MHZ_INTOLERANT) && - (sband->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) && - (hti->ht_param & IEEE80211_HT_PARAM_CHAN_WIDTH_ANY); + ht.operation_mode = le16_to_cpu(hti->operation_mode); /* if bss configuration changed store the new one */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6f59e11d7b33..a7dabaecfc72 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -625,7 +625,7 @@ struct ieee80211_local { struct delayed_work scan_work; struct ieee80211_sub_if_data *scan_sdata; struct ieee80211_channel *oper_channel, *scan_channel; - enum nl80211_sec_chan_offset oper_sec_chan_offset; + enum nl80211_channel_type oper_channel_type; u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; size_t scan_ssid_len; struct list_head bss_list; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 6d8710327d14..a0371caf01ce 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -195,37 +195,30 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) struct ieee80211_channel *chan; int ret = 0; int power; - enum nl80211_sec_chan_offset sec_chan_offset; + enum nl80211_channel_type channel_type; might_sleep(); if (local->sw_scanning) { chan = local->scan_channel; - sec_chan_offset = NL80211_SEC_CHAN_NO_HT; + channel_type = NL80211_CHAN_NO_HT; } else { chan = local->oper_channel; - sec_chan_offset = local->oper_sec_chan_offset; + channel_type = local->oper_channel_type; } if (chan != local->hw.conf.channel || - sec_chan_offset != local->hw.conf.ht.sec_chan_offset) { + channel_type != local->hw.conf.ht.channel_type) { local->hw.conf.channel = chan; - switch (sec_chan_offset) { - case NL80211_SEC_CHAN_NO_HT: + local->hw.conf.ht.channel_type = channel_type; + switch (channel_type) { + case NL80211_CHAN_NO_HT: local->hw.conf.ht.enabled = false; - local->hw.conf.ht.sec_chan_offset = 0; break; - case NL80211_SEC_CHAN_DISABLED: + case NL80211_CHAN_HT20: + case NL80211_CHAN_HT40MINUS: + case NL80211_CHAN_HT40PLUS: local->hw.conf.ht.enabled = true; - local->hw.conf.ht.sec_chan_offset = 0; - break; - case NL80211_SEC_CHAN_BELOW: - local->hw.conf.ht.enabled = true; - local->hw.conf.ht.sec_chan_offset = -1; - break; - case NL80211_SEC_CHAN_ABOVE: - local->hw.conf.ht.enabled = true; - local->hw.conf.ht.sec_chan_offset = 1; break; } changed |= IEEE80211_CONF_CHANGE_CHANNEL; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 290b0017ef2e..e4d1fca5c72d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -858,6 +858,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); local->hw.conf.ht.enabled = false; + local->oper_channel_type = NL80211_CHAN_NO_HT; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_HT); ieee80211_bss_info_change_notify(sdata, changed); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 505d68f344ce..71a8391c54f6 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -641,7 +641,7 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz) chan->flags & IEEE80211_CHAN_NO_IBSS) return ret; local->oper_channel = chan; - local->oper_sec_chan_offset = NL80211_SEC_CHAN_NO_HT; + local->oper_channel_type = NL80211_CHAN_NO_HT; if (local->sw_scanning || local->hw_scanning) ret = 0; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 93c9b983ce08..1e728fff474e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -60,7 +60,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { .len = BUS_ID_SIZE-1 }, [NL80211_ATTR_WIPHY_TXQ_PARAMS] = { .type = NLA_NESTED }, [NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 }, - [NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_CHANNEL_TYPE] = { .type = NLA_U32 }, [NL80211_ATTR_IFTYPE] = { .type = NLA_U32 }, [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, @@ -362,8 +362,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { - enum nl80211_sec_chan_offset sec_chan_offset = - NL80211_SEC_CHAN_NO_HT; + enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; struct ieee80211_channel *chan; struct ieee80211_sta_ht_cap *ht_cap; u32 freq, sec_freq; @@ -375,13 +374,13 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = -EINVAL; - if (info->attrs[NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET]) { - sec_chan_offset = nla_get_u32(info->attrs[ - NL80211_ATTR_WIPHY_SEC_CHAN_OFFSET]); - if (sec_chan_offset != NL80211_SEC_CHAN_NO_HT && - sec_chan_offset != NL80211_SEC_CHAN_DISABLED && - sec_chan_offset != NL80211_SEC_CHAN_BELOW && - sec_chan_offset != NL80211_SEC_CHAN_ABOVE) + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { + channel_type = nla_get_u32(info->attrs[ + NL80211_ATTR_WIPHY_CHANNEL_TYPE]); + if (channel_type != NL80211_CHAN_NO_HT && + channel_type != NL80211_CHAN_HT20 && + channel_type != NL80211_CHAN_HT40PLUS && + channel_type != NL80211_CHAN_HT40MINUS) goto bad_res; } @@ -392,9 +391,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) goto bad_res; - if (sec_chan_offset == NL80211_SEC_CHAN_BELOW) + if (channel_type == NL80211_CHAN_HT40MINUS) sec_freq = freq - 20; - else if (sec_chan_offset == NL80211_SEC_CHAN_ABOVE) + else if (channel_type == NL80211_CHAN_HT40PLUS) sec_freq = freq + 20; else sec_freq = 0; @@ -402,7 +401,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) ht_cap = &rdev->wiphy.bands[chan->band]->ht_cap; /* no HT capabilities */ - if (sec_chan_offset != NL80211_SEC_CHAN_NO_HT && + if (channel_type != NL80211_CHAN_NO_HT && !ht_cap->ht_supported) goto bad_res; @@ -422,7 +421,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } result = rdev->ops->set_channel(&rdev->wiphy, chan, - sec_chan_offset); + channel_type); if (result) goto bad_res; } -- cgit v1.2.3 From 0fb8ca45eb164c405eef8978f26829f9348b4d4d Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 12 Dec 2008 14:38:33 +0200 Subject: mac80211: Add HT rates into RX status reporting This patch adds option for HT-enabled drivers to report HT rates (HT20/HT40, short GI, MCS index) to mac80211. These rates are currently not in the rate table, so the rate_idx is used to indicate MCS index. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/net/mac80211.h | 11 ++++++++-- net/mac80211/mlme.c | 7 ++++++- net/mac80211/rx.c | 56 +++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 61 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 22ae72c58d76..9428d3e2f113 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -436,6 +436,9 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * is valid. This is useful in monitor mode and necessary for beacon frames * to enable IBSS merging. * @RX_FLAG_SHORTPRE: Short preamble was used for this frame + * @RX_FLAG_HT: HT MCS was used and rate_idx is MCS index + * @RX_FLAG_40MHZ: HT40 (40 MHz) was used + * @RX_FLAG_SHORT_GI: Short guard interval was used */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = 1<<0, @@ -446,7 +449,10 @@ enum mac80211_rx_flags { RX_FLAG_FAILED_FCS_CRC = 1<<5, RX_FLAG_FAILED_PLCP_CRC = 1<<6, RX_FLAG_TSFT = 1<<7, - RX_FLAG_SHORTPRE = 1<<8 + RX_FLAG_SHORTPRE = 1<<8, + RX_FLAG_HT = 1<<9, + RX_FLAG_40MHZ = 1<<10, + RX_FLAG_SHORT_GI = 1<<11, }; /** @@ -466,7 +472,8 @@ enum mac80211_rx_flags { * @noise: noise when receiving this frame, in dBm. * @qual: overall signal quality indication, in percent (0-100). * @antenna: antenna used - * @rate_idx: index of data rate into band's supported rates + * @rate_idx: index of data rate into band's supported rates or MCS index if + * HT rates are use (RX_FLAG_HT) * @flag: %RX_FLAG_* */ struct ieee80211_rx_status { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e4d1fca5c72d..a444b38f4901 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1613,8 +1613,13 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, * e.g: at 1 MBit that means mactime is 192 usec earlier * (=24 bytes * 8 usecs/byte) than the beacon timestamp. */ - int rate = local->hw.wiphy->bands[band]-> + int rate; + if (rx_status->flag & RX_FLAG_HT) { + rate = 65; /* TODO: HT rates */ + } else { + rate = local->hw.wiphy->bands[band]-> bitrates[rx_status->rate_idx].bitrate; + } rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate); } else if (local && local->ops && local->ops->get_tsf) /* second best option: get current TSF */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 73cf126cef49..b729c005a2b3 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -149,7 +149,17 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos++; /* IEEE80211_RADIOTAP_RATE */ - *pos = rate->bitrate / 5; + if (status->flag & RX_FLAG_HT) { + /* + * TODO: add following information into radiotap header once + * suitable fields are defined for it: + * - MCS index (status->rate_idx) + * - HT40 (status->flag & RX_FLAG_40MHZ) + * - short-GI (status->flag & RX_FLAG_SHORT_GI) + */ + *pos = 0; + } else + *pos = rate->bitrate / 5; pos++; /* IEEE80211_RADIOTAP_CHANNEL */ @@ -1849,9 +1859,15 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, if (!(sdata->dev->flags & IFF_PROMISC)) return 0; rx->flags &= ~IEEE80211_RX_RA_MATCH; - } else if (!rx->sta) + } else if (!rx->sta) { + int rate_idx; + if (rx->status->flag & RX_FLAG_HT) + rate_idx = 0; /* TODO: HT rates */ + else + rate_idx = rx->status->rate_idx; rx->sta = ieee80211_ibss_add_sta(sdata, bssid, hdr->addr2, - BIT(rx->status->rate_idx)); + BIT(rate_idx)); + } break; case NL80211_IFTYPE_MESH_POINT: if (!multicast && @@ -2057,7 +2073,13 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, tid_agg_rx->reorder_buf[index]->cb, sizeof(status)); sband = local->hw.wiphy->bands[status.band]; - rate = &sband->bitrates[status.rate_idx]; + if (status.flag & RX_FLAG_HT) { + /* TODO: HT rates */ + rate = sband->bitrates; + } else { + rate = &sband->bitrates + [status.rate_idx]; + } __ieee80211_rx_handle_packet(hw, tid_agg_rx->reorder_buf[index], &status, rate); @@ -2101,7 +2123,10 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, memcpy(&status, tid_agg_rx->reorder_buf[index]->cb, sizeof(status)); sband = local->hw.wiphy->bands[status.band]; - rate = &sband->bitrates[status.rate_idx]; + if (status.flag & RX_FLAG_HT) + rate = sband->bitrates; /* TODO: HT rates */ + else + rate = &sband->bitrates[status.rate_idx]; __ieee80211_rx_handle_packet(hw, tid_agg_rx->reorder_buf[index], &status, rate); tid_agg_rx->stored_mpdu_num--; @@ -2189,15 +2214,26 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, } sband = local->hw.wiphy->bands[status->band]; - - if (!sband || - status->rate_idx < 0 || - status->rate_idx >= sband->n_bitrates) { + if (!sband) { WARN_ON(1); return; } - rate = &sband->bitrates[status->rate_idx]; + if (status->flag & RX_FLAG_HT) { + /* rate_idx is MCS index */ + if (WARN_ON(status->rate_idx < 0 || + status->rate_idx >= 76)) + return; + /* HT rates are not in the table - use the highest legacy rate + * for now since other parts of mac80211 may not yet be fully + * MCS aware. */ + rate = &sband->bitrates[sband->n_bitrates - 1]; + } else { + if (WARN_ON(status->rate_idx < 0 || + status->rate_idx >= sband->n_bitrates)) + return; + rate = &sband->bitrates[status->rate_idx]; + } /* * key references and virtual interfaces are protected using RCU -- cgit v1.2.3 From 520eb82076993b7f55ef9b80771d264272e5127b Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Thu, 18 Dec 2008 23:35:27 +0200 Subject: mac80211: implement dynamic power save This patch implements dynamic power save for mac80211. Basically it means enabling power save mode after an idle period. Implementing it dynamically gives a good compromise of low power consumption and low latency. Some hardware have support for this in firmware, but some require the host to do it. The dynamic power save is implemented by adding an timeout to ieee80211_subif_start_xmit(). The timeout can be enabled from userspace with Wireless Extensions. For example, the command below enables the dynamic power save and sets the time timeout to 500 ms: iwconfig wlan0 power timeout 500m Power save now only works with devices which handle power save in firmware. It's also disabled by default and the heuristics when and how to enable is considered as a policy decision and will be left for the userspace to handle. In case the firmware has support for this, drivers can disable this feature with IEEE80211_HW_NO_STACK_DYNAMIC_PS. Big thanks to Johannes Berg for the help with the design and code. Signed-off-by: Kalle Valo Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++++++ net/mac80211/ieee80211_i.h | 10 ++++++++++ net/mac80211/main.c | 7 +++++++ net/mac80211/mlme.c | 49 ++++++++++++++++++++++++++++++++++++++++++++-- net/mac80211/tx.c | 13 ++++++++++++ net/mac80211/wext.c | 30 ++++++++++++++++++---------- 6 files changed, 103 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9428d3e2f113..b3bd00a9d992 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -854,6 +854,11 @@ enum ieee80211_tkip_key_type { * * @IEEE80211_HW_AMPDU_AGGREGATION: * Hardware supports 11n A-MPDU aggregation. + * + * @IEEE80211_HW_NO_STACK_DYNAMIC_PS: + * Hardware which has dynamic power save support, meaning + * that power save is enabled in idle periods, and don't need support + * from stack. */ enum ieee80211_hw_flags { IEEE80211_HW_RX_INCLUDES_FCS = 1<<1, @@ -866,6 +871,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_NOISE_DBM = 1<<8, IEEE80211_HW_SPECTRUM_MGMT = 1<<9, IEEE80211_HW_AMPDU_AGGREGATION = 1<<10, + IEEE80211_HW_NO_STACK_DYNAMIC_PS = 1<<11, }; /** diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a74d6738b30a..f3eec989662b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -540,6 +540,7 @@ enum { enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_DRIVER, + IEEE80211_QUEUE_STOP_REASON_PS, }; /* maximum number of hardware queues we support. */ @@ -693,7 +694,12 @@ struct ieee80211_local { */ int wifi_wme_noack_test; unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */ + bool powersave; + int dynamic_ps_timeout; + struct work_struct dynamic_ps_enable_work; + struct work_struct dynamic_ps_disable_work; + struct timer_list dynamic_ps_timer; #ifdef CONFIG_MAC80211_DEBUGFS struct local_debugfsdentries { @@ -977,6 +983,10 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq); u64 ieee80211_mandatory_rates(struct ieee80211_local *local, enum ieee80211_band band); +void ieee80211_dynamic_ps_enable_work(struct work_struct *work); +void ieee80211_dynamic_ps_disable_work(struct work_struct *work); +void ieee80211_dynamic_ps_timer(unsigned long data); + void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, enum queue_stop_reason reason); void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 21335382f530..24b14363d6e7 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -729,6 +729,13 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work); + INIT_WORK(&local->dynamic_ps_enable_work, + ieee80211_dynamic_ps_enable_work); + INIT_WORK(&local->dynamic_ps_disable_work, + ieee80211_dynamic_ps_disable_work); + setup_timer(&local->dynamic_ps_timer, + ieee80211_dynamic_ps_timer, (unsigned long) local); + sta_info_init(local); tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index dac8bd37dcf5..5ba721b6a399 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -745,8 +745,14 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_bss_info_change_notify(sdata, bss_info_changed); if (local->powersave) { - local->hw.conf.flags |= IEEE80211_CONF_PS; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + if (local->dynamic_ps_timeout > 0) + mod_timer(&local->dynamic_ps_timer, jiffies + + msecs_to_jiffies(local->dynamic_ps_timeout)); + else { + conf->flags |= IEEE80211_CONF_PS; + ieee80211_hw_config(local, + IEEE80211_CONF_CHANGE_PS); + } } netif_tx_start_all_queues(sdata->dev); @@ -866,6 +872,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, local->oper_channel_type = NL80211_CHAN_NO_HT; config_changed |= IEEE80211_CONF_CHANGE_HT; + del_timer_sync(&local->dynamic_ps_timer); + cancel_work_sync(&local->dynamic_ps_enable_work); + if (local->hw.conf.flags & IEEE80211_CONF_PS) { local->hw.conf.flags &= ~IEEE80211_CONF_PS; config_changed |= IEEE80211_CONF_CHANGE_PS; @@ -2593,3 +2602,39 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) ieee80211_restart_sta_timer(sdata); rcu_read_unlock(); } + +void ieee80211_dynamic_ps_disable_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, + dynamic_ps_disable_work); + + if (local->hw.conf.flags & IEEE80211_CONF_PS) { + local->hw.conf.flags &= ~IEEE80211_CONF_PS; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + } + + ieee80211_wake_queues_by_reason(&local->hw, + IEEE80211_QUEUE_STOP_REASON_PS); +} + +void ieee80211_dynamic_ps_enable_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, + dynamic_ps_enable_work); + + if (local->hw.conf.flags & IEEE80211_CONF_PS) + return; + + local->hw.conf.flags |= IEEE80211_CONF_PS; + + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); +} + +void ieee80211_dynamic_ps_timer(unsigned long data) +{ + struct ieee80211_local *local = (void *) data; + + queue_work(local->hw.workqueue, &local->dynamic_ps_enable_work); +} diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b098c58d216f..a4af3a124cce 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1473,6 +1473,19 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, goto fail; } + if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) && + local->dynamic_ps_timeout > 0) { + if (local->hw.conf.flags & IEEE80211_CONF_PS) { + ieee80211_stop_queues_by_reason(&local->hw, + IEEE80211_QUEUE_STOP_REASON_PS); + queue_work(local->hw.workqueue, + &local->dynamic_ps_disable_work); + } + + mod_timer(&local->dynamic_ps_timer, jiffies + + msecs_to_jiffies(local->dynamic_ps_timeout)); + } + nh_pos = skb_network_header(skb) - skb->data; h_pos = skb_transport_header(skb) - skb->data; diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index f6640d047157..7162d5816f39 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -833,7 +833,7 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_conf *conf = &local->hw.conf; - int ret = 0; + int ret = 0, timeout = 0; bool ps; if (sdata->vif.type != NL80211_IFTYPE_STATION) @@ -841,6 +841,7 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, if (wrq->disabled) { ps = false; + timeout = 0; goto set; } @@ -850,22 +851,31 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, case IW_POWER_ALL_R: /* If explicitely state all */ ps = true; break; - default: /* Otherwise we don't support it */ - return -EINVAL; + default: /* Otherwise we ignore */ + break; } - if (ps == local->powersave) - return ret; + if (wrq->flags & IW_POWER_TIMEOUT) + timeout = wrq->value / 1000; set: + if (ps == local->powersave && timeout == local->dynamic_ps_timeout) + return ret; + local->powersave = ps; + local->dynamic_ps_timeout = timeout; if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { - if (local->powersave) - conf->flags |= IEEE80211_CONF_PS; - else - conf->flags &= ~IEEE80211_CONF_PS; - + if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) && + local->dynamic_ps_timeout > 0) + mod_timer(&local->dynamic_ps_timer, jiffies + + msecs_to_jiffies(local->dynamic_ps_timeout)); + else { + if (local->powersave) + conf->flags |= IEEE80211_CONF_PS; + else + conf->flags &= ~IEEE80211_CONF_PS; + } ret = ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } -- cgit v1.2.3 From 12204e24b1330428c3062faee10a0d80b8a5cb61 Mon Sep 17 00:00:00 2001 From: James Morris Date: Fri, 19 Dec 2008 10:44:42 +1100 Subject: security: pass mount flags to security_sb_kern_mount() Pass mount flags to security_sb_kern_mount(), so security modules can determine if a mount operation is being performed by the kernel. Signed-off-by: James Morris Acked-by: Stephen Smalley --- fs/super.c | 2 +- include/linux/security.h | 6 +++--- security/capability.c | 2 +- security/security.c | 4 ++-- security/selinux/hooks.c | 2 +- security/smack/smack_lsm.c | 3 ++- 6 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/super.c b/fs/super.c index 400a7608f15e..ddba069d7a99 100644 --- a/fs/super.c +++ b/fs/super.c @@ -914,7 +914,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void goto out_free_secdata; BUG_ON(!mnt->mnt_sb); - error = security_sb_kern_mount(mnt->mnt_sb, secdata); + error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); if (error) goto out_sb; diff --git a/include/linux/security.h b/include/linux/security.h index 6423abf1ac0f..3416cb85e77b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1308,7 +1308,7 @@ struct security_operations { int (*sb_alloc_security) (struct super_block *sb); void (*sb_free_security) (struct super_block *sb); int (*sb_copy_data) (char *orig, char *copy); - int (*sb_kern_mount) (struct super_block *sb, void *data); + int (*sb_kern_mount) (struct super_block *sb, int flags, void *data); int (*sb_show_options) (struct seq_file *m, struct super_block *sb); int (*sb_statfs) (struct dentry *dentry); int (*sb_mount) (char *dev_name, struct path *path, @@ -1575,7 +1575,7 @@ int security_bprm_secureexec(struct linux_binprm *bprm); int security_sb_alloc(struct super_block *sb); void security_sb_free(struct super_block *sb); int security_sb_copy_data(char *orig, char *copy); -int security_sb_kern_mount(struct super_block *sb, void *data); +int security_sb_kern_mount(struct super_block *sb, int flags, void *data); int security_sb_show_options(struct seq_file *m, struct super_block *sb); int security_sb_statfs(struct dentry *dentry); int security_sb_mount(char *dev_name, struct path *path, @@ -1850,7 +1850,7 @@ static inline int security_sb_copy_data(char *orig, char *copy) return 0; } -static inline int security_sb_kern_mount(struct super_block *sb, void *data) +static inline int security_sb_kern_mount(struct super_block *sb, int flags, void *data) { return 0; } diff --git a/security/capability.c b/security/capability.c index b9e391425e6f..2dce66fcb992 100644 --- a/security/capability.c +++ b/security/capability.c @@ -59,7 +59,7 @@ static int cap_sb_copy_data(char *orig, char *copy) return 0; } -static int cap_sb_kern_mount(struct super_block *sb, void *data) +static int cap_sb_kern_mount(struct super_block *sb, int flags, void *data) { return 0; } diff --git a/security/security.c b/security/security.c index f0d96a6cc4e9..d85dbb37c972 100644 --- a/security/security.c +++ b/security/security.c @@ -254,9 +254,9 @@ int security_sb_copy_data(char *orig, char *copy) } EXPORT_SYMBOL(security_sb_copy_data); -int security_sb_kern_mount(struct super_block *sb, void *data) +int security_sb_kern_mount(struct super_block *sb, int flags, void *data) { - return security_ops->sb_kern_mount(sb, data); + return security_ops->sb_kern_mount(sb, flags, data); } int security_sb_show_options(struct seq_file *m, struct super_block *sb) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 8dbc54cde59e..7465d713b531 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2474,7 +2474,7 @@ out: return rc; } -static int selinux_sb_kern_mount(struct super_block *sb, void *data) +static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data) { const struct cred *cred = current_cred(); struct avc_audit_data ad; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 8ad48161cef5..1b5551dfc1f7 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -250,11 +250,12 @@ static int smack_sb_copy_data(char *orig, char *smackopts) /** * smack_sb_kern_mount - Smack specific mount processing * @sb: the file system superblock + * @flags: the mount flags * @data: the smack mount options * * Returns 0 on success, an error code on failure */ -static int smack_sb_kern_mount(struct super_block *sb, void *data) +static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data) { struct dentry *root = sb->s_root; struct inode *inode = root->d_inode; -- cgit v1.2.3 From 6bd9cd50c830eb88d571c492ec370a30bf999e15 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 19 Dec 2008 13:47:26 -0800 Subject: x86: PAT: clarify is_linear_pfn_mapping() interface Impact: Documentation only Incremental patches to address the review comments from Nick Piggin for v3 version of x86 PAT pfnmap changes patchset here http://lkml.indiana.edu/hypermail/linux/kernel/0812.2/01330.html This patch: Clarify is_linear_pfn_mapping() and its usage. It is used by x86 PAT code for performance reasons. Identifying pfnmap as linear over entire vma helps speedup reserve and free of memtype for the region. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 87ecb40e11a0..35f811b0cd69 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -145,6 +145,14 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ +/* + * This interface is used by x86 PAT code to identify a pfn mapping that is + * linear over entire vma. This is to optimize PAT code that deals with + * marking the physical region with a particular prot. This is not for generic + * mm use. Note also that this check will not work if the pfn mapping is + * linear for a vma starting at physical address 0. In which case PAT code + * falls back to slow path of reserving physical range page by page. + */ static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) { return ((vma->vm_flags & VM_PFNMAP) && vma->vm_pgoff); -- cgit v1.2.3 From d87fe6607c31944f7572f965c1507ae77026c133 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 19 Dec 2008 13:47:27 -0800 Subject: x86: PAT: modify follow_phys to return phys_addr prot and return value Impact: Changes and globalizes an existing static interface. Follow_phys does similar things as follow_pfnmap_pte. Make a minor change to follow_phys so that it can be used in place of follow_pfnmap_pte. Physical address return value with 0 as error return does not work in follow_phys as the actual physical address 0 mapping may exist in pte. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 2 ++ mm/memory.c | 31 ++++++++++++++----------------- 2 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 35f811b0cd69..2f6e2f886d4b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -804,6 +804,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows); +int follow_phys(struct vm_area_struct *vma, unsigned long address, + unsigned int flags, unsigned long *prot, resource_size_t *phys); int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); diff --git a/mm/memory.c b/mm/memory.c index 1e8f0d347c0e..79f28e35d4fc 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2981,9 +2981,9 @@ int in_gate_area_no_task(unsigned long addr) #endif /* __HAVE_ARCH_GATE_AREA */ #ifdef CONFIG_HAVE_IOREMAP_PROT -static resource_size_t follow_phys(struct vm_area_struct *vma, - unsigned long address, unsigned int flags, - unsigned long *prot) +int follow_phys(struct vm_area_struct *vma, + unsigned long address, unsigned int flags, + unsigned long *prot, resource_size_t *phys) { pgd_t *pgd; pud_t *pud; @@ -2992,24 +2992,26 @@ static resource_size_t follow_phys(struct vm_area_struct *vma, spinlock_t *ptl; resource_size_t phys_addr = 0; struct mm_struct *mm = vma->vm_mm; + int ret = -EINVAL; - VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP))); + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) + goto out; pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - goto no_page_table; + goto out; pud = pud_offset(pgd, address); if (pud_none(*pud) || unlikely(pud_bad(*pud))) - goto no_page_table; + goto out; pmd = pmd_offset(pud, address); if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - goto no_page_table; + goto out; /* We cannot handle huge page PFN maps. Luckily they don't exist. */ if (pmd_huge(*pmd)) - goto no_page_table; + goto out; ptep = pte_offset_map_lock(mm, pmd, address, &ptl); if (!ptep) @@ -3024,13 +3026,13 @@ static resource_size_t follow_phys(struct vm_area_struct *vma, phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */ *prot = pgprot_val(pte_pgprot(pte)); + *phys = phys_addr; + ret = 0; unlock: pte_unmap_unlock(ptep, ptl); out: - return phys_addr; -no_page_table: - return 0; + return ret; } int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, @@ -3041,12 +3043,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *maddr; int offset = addr & (PAGE_SIZE-1); - if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) - return -EINVAL; - - phys_addr = follow_phys(vma, addr, write, &prot); - - if (!phys_addr) + if (follow_phys(vma, addr, write, &prot, &phys_addr)) return -EINVAL; maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot); -- cgit v1.2.3 From 982d789ab76c8a11426852fec2fdf2f412e21c0c Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 19 Dec 2008 13:47:28 -0800 Subject: x86: PAT: remove follow_pfnmap_pte in favor of follow_phys Impact: Cleanup - removes a new function in favor of a recently modified older one. Replace follow_pfnmap_pte in pat code with follow_phys. follow_phys lso returns protection eliminating the need of pte_pgprot call. Using follow_phys also eliminates the need for pte_pa. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable.h | 5 ----- arch/x86/mm/pat.c | 30 +++++++++++------------------ include/linux/mm.h | 3 --- mm/memory.c | 43 ------------------------------------------ 4 files changed, 11 insertions(+), 70 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 579f8ceee948..2aa792bbd7e0 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -230,11 +230,6 @@ static inline unsigned long pte_pfn(pte_t pte) return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; } -static inline u64 pte_pa(pte_t pte) -{ - return pte_val(pte) & PTE_PFN_MASK; -} - #define pte_page(pte) pfn_to_page(pte_pfn(pte)) static inline int pmd_large(pmd_t pte) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index d5254bae84f4..541bcc944a5b 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -685,8 +685,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) int retval = 0; unsigned long i, j; u64 paddr; - pgprot_t prot; - pte_t pte; + unsigned long prot; unsigned long vma_start = vma->vm_start; unsigned long vma_end = vma->vm_end; unsigned long vma_size = vma_end - vma_start; @@ -696,26 +695,22 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) if (is_linear_pfn_mapping(vma)) { /* - * reserve the whole chunk starting from vm_pgoff, - * But, we have to get the protection from pte. + * reserve the whole chunk covered by vma. We need the + * starting address and protection from pte. */ - if (follow_pfnmap_pte(vma, vma_start, &pte)) { + if (follow_phys(vma, vma_start, 0, &prot, &paddr)) { WARN_ON_ONCE(1); - return -1; + return -EINVAL; } - prot = pte_pgprot(pte); - paddr = (u64)vma->vm_pgoff << PAGE_SHIFT; - return reserve_pfn_range(paddr, vma_size, prot); + return reserve_pfn_range(paddr, vma_size, __pgprot(prot)); } /* reserve entire vma page by page, using pfn and prot from pte */ for (i = 0; i < vma_size; i += PAGE_SIZE) { - if (follow_pfnmap_pte(vma, vma_start + i, &pte)) + if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) continue; - paddr = pte_pa(pte); - prot = pte_pgprot(pte); - retval = reserve_pfn_range(paddr, PAGE_SIZE, prot); + retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot)); if (retval) goto cleanup_ret; } @@ -724,10 +719,9 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) cleanup_ret: /* Reserve error: Cleanup partial reservation and return error */ for (j = 0; j < i; j += PAGE_SIZE) { - if (follow_pfnmap_pte(vma, vma_start + j, &pte)) + if (follow_phys(vma, vma_start + j, 0, &prot, &paddr)) continue; - paddr = pte_pa(pte); free_pfn_range(paddr, PAGE_SIZE); } @@ -797,6 +791,7 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, { unsigned long i; u64 paddr; + unsigned long prot; unsigned long vma_start = vma->vm_start; unsigned long vma_end = vma->vm_end; unsigned long vma_size = vma_end - vma_start; @@ -821,12 +816,9 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, } else { /* free entire vma, page by page, using the pfn from pte */ for (i = 0; i < vma_size; i += PAGE_SIZE) { - pte_t pte; - - if (follow_pfnmap_pte(vma, vma_start + i, &pte)) + if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) continue; - paddr = pte_pa(pte); free_pfn_range(paddr, PAGE_SIZE); } } diff --git a/include/linux/mm.h b/include/linux/mm.h index 2f6e2f886d4b..36f9b3fa5e15 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1239,9 +1239,6 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ -int follow_pfnmap_pte(struct vm_area_struct *vma, - unsigned long address, pte_t *ret_ptep); - typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, diff --git a/mm/memory.c b/mm/memory.c index 79f28e35d4fc..6b29f39a5a3e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1168,49 +1168,6 @@ no_page_table: return page; } -int follow_pfnmap_pte(struct vm_area_struct *vma, unsigned long address, - pte_t *ret_ptep) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *ptep, pte; - spinlock_t *ptl; - struct page *page; - struct mm_struct *mm = vma->vm_mm; - - if (!is_pfn_mapping(vma)) - goto err; - - page = NULL; - pgd = pgd_offset(mm, address); - if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - goto err; - - pud = pud_offset(pgd, address); - if (pud_none(*pud) || unlikely(pud_bad(*pud))) - goto err; - - pmd = pmd_offset(pud, address); - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - goto err; - - ptep = pte_offset_map_lock(mm, pmd, address, &ptl); - - pte = *ptep; - if (!pte_present(pte)) - goto err_unlock; - - *ret_ptep = pte; - pte_unmap_unlock(ptep, ptl); - return 0; - -err_unlock: - pte_unmap_unlock(ptep, ptl); -err: - return -EINVAL; -} - /* Can we do the FOLL_ANON optimization? */ static inline int use_zero_page(struct vm_area_struct *vma) { -- cgit v1.2.3 From 34801ba9bf0381fcf0e2b08179d2c07f2c6ede74 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 19 Dec 2008 13:47:29 -0800 Subject: x86: PAT: move track untrack pfnmap stubs to asm-generic Impact: Cleanup and branch hints only. Move the track and untrack pfn stub routines from memory.c to asm-generic. Also add unlikely to pfnmap related calls in fork and exit path. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable.h | 6 ++---- include/asm-generic/pgtable.h | 46 ++++++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 6 ------ mm/memory.c | 48 ++---------------------------------------- 4 files changed, 50 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2aa792bbd7e0..875192bf72cb 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -339,12 +339,10 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) +#ifndef __ASSEMBLY__ /* Indicate that x86 has its own track and untrack pfn vma functions */ -#define track_pfn_vma_new track_pfn_vma_new -#define track_pfn_vma_copy track_pfn_vma_copy -#define untrack_pfn_vma untrack_pfn_vma +#define __HAVE_PFNMAP_TRACKING -#ifndef __ASSEMBLY__ #define __HAVE_PHYS_MEM_ACCESS_PROT struct file; pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index b84633801fb6..72ebe91005a8 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -293,6 +293,52 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, #define arch_flush_lazy_cpu_mode() do {} while (0) #endif +#ifndef __HAVE_PFNMAP_TRACKING +/* + * Interface that can be used by architecture code to keep track of + * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) + * + * track_pfn_vma_new is called when a _new_ pfn mapping is being established + * for physical range indicated by pfn and size. + */ +static inline int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, + unsigned long pfn, unsigned long size) +{ + return 0; +} + +/* + * Interface that can be used by architecture code to keep track of + * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) + * + * track_pfn_vma_copy is called when vma that is covering the pfnmap gets + * copied through copy_page_range(). + */ +static inline int track_pfn_vma_copy(struct vm_area_struct *vma) +{ + return 0; +} + +/* + * Interface that can be used by architecture code to keep track of + * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) + * + * untrack_pfn_vma is called while unmapping a pfnmap for a region. + * untrack can be called for a specific region indicated by pfn and size or + * can be for the entire vma (in which case size can be zero). + */ +static inline void untrack_pfn_vma(struct vm_area_struct *vma, + unsigned long pfn, unsigned long size) +{ +} +#else +extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, + unsigned long pfn, unsigned long size); +extern int track_pfn_vma_copy(struct vm_area_struct *vma); +extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, + unsigned long size); +#endif + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_GENERIC_PGTABLE_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 36f9b3fa5e15..d3ddd735e375 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -163,12 +163,6 @@ static inline int is_pfn_mapping(struct vm_area_struct *vma) return (vma->vm_flags & VM_PFNMAP); } -extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, - unsigned long pfn, unsigned long size); -extern int track_pfn_vma_copy(struct vm_area_struct *vma); -extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size); - /* * vm_fault is filled by the the pagefault handler and passed to the vma's * ->fault function. The vma's ->fault is responsible for returning a bitmask diff --git a/mm/memory.c b/mm/memory.c index 6b29f39a5a3e..f01b7eed6e16 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -99,50 +99,6 @@ int randomize_va_space __read_mostly = 2; #endif -#ifndef track_pfn_vma_new -/* - * Interface that can be used by architecture code to keep track of - * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) - * - * track_pfn_vma_new is called when a _new_ pfn mapping is being established - * for physical range indicated by pfn and size. - */ -int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, - unsigned long pfn, unsigned long size) -{ - return 0; -} -#endif - -#ifndef track_pfn_vma_copy -/* - * Interface that can be used by architecture code to keep track of - * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) - * - * track_pfn_vma_copy is called when vma that is covering the pfnmap gets - * copied through copy_page_range(). - */ -int track_pfn_vma_copy(struct vm_area_struct *vma) -{ - return 0; -} -#endif - -#ifndef untrack_pfn_vma -/* - * Interface that can be used by architecture code to keep track of - * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) - * - * untrack_pfn_vma is called while unmapping a pfnmap for a region. - * untrack can be called for a specific region indicated by pfn and size or - * can be for the entire vma (in which case size can be zero). - */ -void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size) -{ -} -#endif - static int __init disable_randmaps(char *s) { randomize_va_space = 0; @@ -713,7 +669,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); - if (is_pfn_mapping(vma)) { + if (unlikely(is_pfn_mapping(vma))) { /* * We do not free on error cases below as remove_vma * gets called on error from higher level routine @@ -969,7 +925,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, if (vma->vm_flags & VM_ACCOUNT) *nr_accounted += (end - start) >> PAGE_SHIFT; - if (is_pfn_mapping(vma)) + if (unlikely(is_pfn_mapping(vma))) untrack_pfn_vma(vma, 0, 0); while (start != end) { -- cgit v1.2.3 From bf53de907dfdaac178c92d774aae7370d7b97d20 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 19 Dec 2008 15:10:24 +0100 Subject: x86, bts: add fork and exit handling Impact: introduce new ptrace facility Add arch_ptrace_untrace() function that is called when the tracer detaches (either voluntarily or when the tracing task dies); ptrace_disable() is only called on a voluntary detach. Add ptrace_fork() and arch_ptrace_fork(). They are called when a traced task is forked. Clear DS and BTS related fields on fork. Release DS resources and reclaim memory in ptrace_untrace(). This releases resources already when the tracing task dies. We used to do that when the traced task dies. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ds.h | 9 ++++++++ arch/x86/include/asm/ptrace.h | 7 ++++++ arch/x86/kernel/ds.c | 11 ++++++++++ arch/x86/kernel/process_32.c | 20 ++++++++--------- arch/x86/kernel/process_64.c | 20 ++++++++--------- arch/x86/kernel/ptrace.c | 50 ++++++++++++++++++++++++++++++++++--------- include/linux/ptrace.h | 22 +++++++++++++++++++ kernel/fork.c | 2 ++ kernel/ptrace.c | 12 +++++++++++ 9 files changed, 121 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index ee0ea3a96c11..a8f672ba100c 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -252,12 +252,21 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); */ extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); +/* + * Task clone/init and cleanup work + */ +extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father); +extern void ds_exit_thread(struct task_struct *tsk); + #else /* CONFIG_X86_DS */ struct cpuinfo_x86; static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} static inline void ds_switch_to(struct task_struct *prev, struct task_struct *next) {} +static inline void ds_copy_thread(struct task_struct *tsk, + struct task_struct *father) {} +static inline void ds_exit_thread(struct task_struct *tsk) {} #endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index fbf744215911..6d34d954c228 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -235,6 +235,13 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); +extern void x86_ptrace_untrace(struct task_struct *); +extern void x86_ptrace_fork(struct task_struct *child, + unsigned long clone_flags); + +#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk) +#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags) + #endif /* __KERNEL__ */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 98d271e60e08..da91701a2348 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -1017,3 +1017,14 @@ void ds_switch_to(struct task_struct *prev, struct task_struct *next) update_debugctlmsr(next->thread.debugctlmsr); } + +void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) +{ + clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); + tsk->thread.ds_ctx = NULL; +} + +void ds_exit_thread(struct task_struct *tsk) +{ + WARN_ON(tsk->thread.ds_ctx); +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 605eff9a8ac0..3ba155d24884 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -60,6 +60,7 @@ #include #include #include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -251,17 +252,8 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } -#ifdef CONFIG_X86_DS - /* Free any BTS tracers that have not been properly released. */ - if (unlikely(current->bts)) { - ds_release_bts(current->bts); - current->bts = NULL; - - kfree(current->bts_buffer); - current->bts_buffer = NULL; - current->bts_size = 0; - } -#endif /* CONFIG_X86_DS */ + + ds_exit_thread(current); } void flush_thread(void) @@ -343,6 +335,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } + + ds_copy_thread(p, current); + + clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); + p->thread.debugctlmsr = 0; + return err; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 1cfd2a4bf853..416fb9282f4f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -53,6 +53,7 @@ #include #include #include +#include asmlinkage extern void ret_from_fork(void); @@ -236,17 +237,8 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } -#ifdef CONFIG_X86_DS - /* Free any BTS tracers that have not been properly released. */ - if (unlikely(current->bts)) { - ds_release_bts(current->bts); - current->bts = NULL; - - kfree(current->bts_buffer); - current->bts_buffer = NULL; - current->bts_size = 0; - } -#endif /* CONFIG_X86_DS */ + + ds_exit_thread(current); } void flush_thread(void) @@ -376,6 +368,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, if (err) goto out; } + + ds_copy_thread(p, me); + + clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); + p->thread.debugctlmsr = 0; + err = 0; out: if (err && p->thread.io_bitmap_ptr) { diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 45e9855da2d2..6ad2bb607650 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -769,8 +769,47 @@ static int ptrace_bts_size(struct task_struct *child) return (trace->ds.top - trace->ds.begin) / trace->ds.size; } + +static void ptrace_bts_fork(struct task_struct *tsk) +{ + tsk->bts = NULL; + tsk->bts_buffer = NULL; + tsk->bts_size = 0; + tsk->thread.bts_ovfl_signal = 0; +} + +static void ptrace_bts_untrace(struct task_struct *child) +{ + if (unlikely(child->bts)) { + ds_release_bts(child->bts); + child->bts = NULL; + + kfree(child->bts_buffer); + child->bts_buffer = NULL; + child->bts_size = 0; + } +} + +static void ptrace_bts_detach(struct task_struct *child) +{ + ptrace_bts_untrace(child); +} +#else +static inline void ptrace_bts_fork(struct task_struct *tsk) {} +static inline void ptrace_bts_detach(struct task_struct *child) {} +static inline void ptrace_bts_untrace(struct task_struct *child) {} #endif /* CONFIG_X86_PTRACE_BTS */ +void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags) +{ + ptrace_bts_fork(child); +} + +void x86_ptrace_untrace(struct task_struct *child) +{ + ptrace_bts_untrace(child); +} + /* * Called by kernel/ptrace.c when detaching.. * @@ -782,16 +821,7 @@ void ptrace_disable(struct task_struct *child) #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif -#ifdef CONFIG_X86_PTRACE_BTS - if (child->bts) { - ds_release_bts(child->bts); - child->bts = NULL; - - kfree(child->bts_buffer); - child->bts_buffer = NULL; - child->bts_size = 0; - } -#endif /* CONFIG_X86_PTRACE_BTS */ + ptrace_bts_detach(child); } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 22641d5d45df..98b93ca4db06 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -94,6 +94,7 @@ extern void ptrace_notify(int exit_code); extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); +extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags); #define PTRACE_MODE_READ 1 #define PTRACE_MODE_ATTACH 2 /* Returns 0 on success, -errno on denial. */ @@ -313,6 +314,27 @@ static inline void user_enable_block_step(struct task_struct *task) #define arch_ptrace_stop(code, info) do { } while (0) #endif +#ifndef arch_ptrace_untrace +/* + * Do machine-specific work before untracing child. + * + * This is called for a normal detach as well as from ptrace_exit() + * when the tracing task dies. + * + * Called with write_lock(&tasklist_lock) held. + */ +#define arch_ptrace_untrace(task) do { } while (0) +#endif + +#ifndef arch_ptrace_fork +/* + * Do machine-specific work to initialize a new task. + * + * This is called from copy_process(). + */ +#define arch_ptrace_fork(child, clone_flags) do { } while (0) +#endif + extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); diff --git a/kernel/fork.c b/kernel/fork.c index 7b93da72d4a2..65ce60adc8e8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1096,6 +1096,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif + if (unlikely(ptrace_reparented(current))) + ptrace_fork(p, clone_flags); /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4c8bcd7dd8e0..100a71cfdaba 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -25,6 +25,17 @@ #include #include + +/* + * Initialize a new task whose father had been ptraced. + * + * Called from copy_process(). + */ +void ptrace_fork(struct task_struct *child, unsigned long clone_flags) +{ + arch_ptrace_fork(child, clone_flags); +} + /* * ptrace a task: make the debugger its new parent and * move it to the ptrace list. @@ -72,6 +83,7 @@ void __ptrace_unlink(struct task_struct *child) child->parent = child->real_parent; list_del_init(&child->ptrace_entry); + arch_ptrace_untrace(child); if (task_is_traced(child)) ptrace_untrace(child); } -- cgit v1.2.3 From c5dee6177f4bd2095aab7d9be9f6ebdddd6deee9 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 19 Dec 2008 15:17:02 +0100 Subject: x86, bts: memory accounting Impact: move the BTS buffer accounting to the mlock bucket Add alloc_locked_buffer() and free_locked_buffer() functions to mm/mlock.c to kalloc a buffer and account the locked memory to current. Account the memory for the BTS buffer to the tracer. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 45 ++++++++++++++++++++++++++++++++++----------- include/linux/mm.h | 2 ++ mm/mlock.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 6ad2bb607650..0a5df5f82fb9 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -650,6 +650,24 @@ static int ptrace_bts_drain(struct task_struct *child, return drained; } +static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size) +{ + child->bts_buffer = alloc_locked_buffer(size); + if (!child->bts_buffer) + return -ENOMEM; + + child->bts_size = size; + + return 0; +} + +static void ptrace_bts_free_buffer(struct task_struct *child) +{ + free_locked_buffer(child->bts_buffer, child->bts_size); + child->bts_buffer = NULL; + child->bts_size = 0; +} + static int ptrace_bts_config(struct task_struct *child, long cfg_size, const struct ptrace_bts_config __user *ucfg) @@ -679,14 +697,13 @@ static int ptrace_bts_config(struct task_struct *child, if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != child->bts_size)) { - kfree(child->bts_buffer); + int error; - child->bts_size = cfg.size; - child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL); - if (!child->bts_buffer) { - child->bts_size = 0; - return -ENOMEM; - } + ptrace_bts_free_buffer(child); + + error = ptrace_bts_allocate_buffer(child, cfg.size); + if (error < 0) + return error; } if (cfg.flags & PTRACE_BTS_O_TRACE) @@ -701,10 +718,8 @@ static int ptrace_bts_config(struct task_struct *child, if (IS_ERR(child->bts)) { int error = PTR_ERR(child->bts); - kfree(child->bts_buffer); + ptrace_bts_free_buffer(child); child->bts = NULL; - child->bts_buffer = NULL; - child->bts_size = 0; return error; } @@ -784,6 +799,9 @@ static void ptrace_bts_untrace(struct task_struct *child) ds_release_bts(child->bts); child->bts = NULL; + /* We cannot update total_vm and locked_vm since + child's mm is already gone. But we can reclaim the + memory. */ kfree(child->bts_buffer); child->bts_buffer = NULL; child->bts_size = 0; @@ -792,7 +810,12 @@ static void ptrace_bts_untrace(struct task_struct *child) static void ptrace_bts_detach(struct task_struct *child) { - ptrace_bts_untrace(child); + if (unlikely(child->bts)) { + ds_release_bts(child->bts); + child->bts = NULL; + + ptrace_bts_free_buffer(child); + } } #else static inline void ptrace_bts_fork(struct task_struct *tsk) {} diff --git a/include/linux/mm.h b/include/linux/mm.h index ffee2f743418..9979d3fab6e7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1286,5 +1286,7 @@ int vmemmap_populate_basepages(struct page *start_page, int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); +extern void *alloc_locked_buffer(size_t size); +extern void free_locked_buffer(void *buffer, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/mlock.c b/mm/mlock.c index 1ada366570cb..3035a56e7616 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -667,3 +667,48 @@ void user_shm_unlock(size_t size, struct user_struct *user) spin_unlock(&shmlock_user_lock); free_uid(user); } + +void *alloc_locked_buffer(size_t size) +{ + unsigned long rlim, vm, pgsz; + void *buffer = NULL; + + pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; + + down_write(¤t->mm->mmap_sem); + + rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + vm = current->mm->total_vm + pgsz; + if (rlim < vm) + goto out; + + rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + vm = current->mm->locked_vm + pgsz; + if (rlim < vm) + goto out; + + buffer = kzalloc(size, GFP_KERNEL); + if (!buffer) + goto out; + + current->mm->total_vm += pgsz; + current->mm->locked_vm += pgsz; + + out: + up_write(¤t->mm->mmap_sem); + return buffer; +} + +void free_locked_buffer(void *buffer, size_t size) +{ + unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; + + down_write(¤t->mm->mmap_sem); + + current->mm->total_vm -= pgsz; + current->mm->locked_vm -= pgsz; + + up_write(¤t->mm->mmap_sem); + + kfree(buffer); +} -- cgit v1.2.3 From 749820928a2fd47ff536773d869d2c3f8038b7d1 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Fri, 5 Dec 2008 08:15:54 +0000 Subject: of/gpio: Implement of_gpio_count() This function is used to count how many GPIOs are specified for a device node. Signed-off-by: Anton Vorontsov Signed-off-by: Paul Mackerras --- drivers/of/gpio.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/of_gpio.h | 6 ++++++ 2 files changed, 40 insertions(+) (limited to 'include') diff --git a/drivers/of/gpio.c b/drivers/of/gpio.c index a4ba217116eb..6eea601a9204 100644 --- a/drivers/of/gpio.c +++ b/drivers/of/gpio.c @@ -79,6 +79,40 @@ err0: } EXPORT_SYMBOL(of_get_gpio_flags); +/** + * of_gpio_count - Count GPIOs for a device + * @np: device node to count GPIOs for + * + * The function returns the count of GPIOs specified for a node. + * + * Note that the empty GPIO specifiers counts too. For example, + * + * gpios = <0 + * &pio1 1 2 + * 0 + * &pio2 3 4>; + * + * defines four GPIOs (so this function will return 4), two of which + * are not specified. + */ +unsigned int of_gpio_count(struct device_node *np) +{ + unsigned int cnt = 0; + + do { + int ret; + + ret = of_parse_phandles_with_args(np, "gpios", "#gpio-cells", + cnt, NULL, NULL); + /* A hole in the gpios = <> counts anyway. */ + if (ret < 0 && ret != -EEXIST) + break; + } while (++cnt); + + return cnt; +} +EXPORT_SYMBOL(of_gpio_count); + /** * of_gpio_simple_xlate - translate gpio_spec to the GPIO number and flags * @of_gc: pointer to the of_gpio_chip structure diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index e25abf610cb6..fc2472c3c254 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -65,6 +65,7 @@ static inline struct of_mm_gpio_chip *to_of_mm_gpio_chip(struct gpio_chip *gc) extern int of_get_gpio_flags(struct device_node *np, int index, enum of_gpio_flags *flags); +extern unsigned int of_gpio_count(struct device_node *np); extern int of_mm_gpiochip_add(struct device_node *np, struct of_mm_gpio_chip *mm_gc); @@ -81,6 +82,11 @@ static inline int of_get_gpio_flags(struct device_node *np, int index, return -ENOSYS; } +static inline unsigned int of_gpio_count(struct device_node *np) +{ + return 0; +} + #endif /* CONFIG_OF_GPIO */ /** -- cgit v1.2.3 From 3ddeb912f41801fd1968c7880d031702a396e4d0 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sat, 20 Dec 2008 17:15:14 +0800 Subject: ftrace: enable format arguments checking Impact: broaden gcc printf format checks for ftrace_printk() format arguments checking for ftrace_printk() is __printf(1, 2), not __printf(1, 0). Signed-off-by: Lai Jiangshan Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 04b52e6ebc66..677432b9cb7e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -303,7 +303,7 @@ extern void ftrace_dump(void); static inline void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } static inline int -ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 0))); +ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); static inline void tracing_start(void) { } static inline void tracing_stop(void) { } -- cgit v1.2.3 From 1486a61ebcd2711532f8163d30babc40e11e7b40 Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Sun, 21 Dec 2008 20:09:50 -0800 Subject: net: fix DCB setstate to return success/failure Data Center Bridging (DCB) had no way to know if setstate had failed in the driver. This patch enables dcb netlink code to handle the status for the DCB setstate interface. Likewise it allows the driver to return a failed status if MSI-X isn't enabled. Signed-off-by: Don Skidmore Signed-off-by: Eric W Multanen Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_dcb_nl.c | 52 ++++++++++++++++++++++------------------ include/net/dcbnl.h | 2 +- net/dcb/dcbnl.c | 5 ++-- 3 files changed, 32 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c index 615c2803202a..7d158a5c545b 100644 --- a/drivers/net/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c @@ -124,39 +124,45 @@ static u16 ixgbe_dcb_select_queue(struct net_device *dev, struct sk_buff *skb) return 0; } -static void ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state) +static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state) { + u8 err = 0; struct ixgbe_adapter *adapter = netdev_priv(netdev); DPRINTK(DRV, INFO, "Set DCB Admin Mode.\n"); if (state > 0) { /* Turn on DCB */ - if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { - return; - } else { - if (netif_running(netdev)) - netdev->stop(netdev); - ixgbe_reset_interrupt_capability(adapter); - ixgbe_napi_del_all(adapter); - kfree(adapter->tx_ring); - kfree(adapter->rx_ring); - adapter->tx_ring = NULL; - adapter->rx_ring = NULL; - netdev->select_queue = &ixgbe_dcb_select_queue; + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) + goto out; - adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; - adapter->flags |= IXGBE_FLAG_DCB_ENABLED; - ixgbe_init_interrupt_scheme(adapter); - ixgbe_napi_add_all(adapter); - if (netif_running(netdev)) - netdev->open(netdev); + if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) { + DPRINTK(DRV, ERR, "Enable failed, needs MSI-X\n"); + err = 1; + goto out; } + + if (netif_running(netdev)) + netdev->netdev_ops->ndo_stop(netdev); + ixgbe_reset_interrupt_capability(adapter); + ixgbe_napi_del_all(adapter); + kfree(adapter->tx_ring); + kfree(adapter->rx_ring); + adapter->tx_ring = NULL; + adapter->rx_ring = NULL; + netdev->select_queue = &ixgbe_dcb_select_queue; + + adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; + adapter->flags |= IXGBE_FLAG_DCB_ENABLED; + ixgbe_init_interrupt_scheme(adapter); + ixgbe_napi_add_all(adapter); + if (netif_running(netdev)) + netdev->netdev_ops->ndo_open(netdev); } else { /* Turn off DCB */ if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { if (netif_running(netdev)) - netdev->stop(netdev); + netdev->netdev_ops->ndo_stop(netdev); ixgbe_reset_interrupt_capability(adapter); ixgbe_napi_del_all(adapter); kfree(adapter->tx_ring); @@ -170,11 +176,11 @@ static void ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state) ixgbe_init_interrupt_scheme(adapter); ixgbe_napi_add_all(adapter); if (netif_running(netdev)) - netdev->open(netdev); - } else { - return; + netdev->netdev_ops->ndo_open(netdev); } } +out: + return err; } static void ixgbe_dcbnl_get_perm_hw_addr(struct net_device *netdev, diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index 91e0a3d7faf2..775cfc8055be 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -26,7 +26,7 @@ */ struct dcbnl_rtnl_ops { u8 (*getstate)(struct net_device *); - void (*setstate)(struct net_device *, u8); + u8 (*setstate)(struct net_device *, u8); void (*getpermhwaddr)(struct net_device *, u8 *); void (*setpgtccfgtx)(struct net_device *, int, u8, u8, u8, u8); void (*setpgbwgcfgtx)(struct net_device *, int, u8); diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index a1254061629f..fc88fc4d4f63 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -714,9 +714,8 @@ static int dcbnl_setstate(struct net_device *netdev, struct nlattr **tb, value = nla_get_u8(tb[DCB_ATTR_STATE]); - netdev->dcbnl_ops->setstate(netdev, value); - - ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_SSTATE, DCB_ATTR_STATE, + ret = dcbnl_reply(netdev->dcbnl_ops->setstate(netdev, value), + RTM_SETDCB, DCB_CMD_SSTATE, DCB_ATTR_STATE, pid, seq, flags); return ret; -- cgit v1.2.3 From f4314e815e87b4ab1c9b1115dd5853cd20ca999c Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Sun, 21 Dec 2008 20:10:29 -0800 Subject: net: add DCNA attribute to the BCN interface for DCB Adds the Backward Congestion Notification Address (BCNA) attribute to the Backward Congestion Notification (BCN) interface for Data Center Bridging (DCB), which was missing. Receive the BCNA attribute in the ixgbe driver. The BCNA attribute is for a switch to inform the endstation about the physical port identification in order to support BCN on aggregated links. Signed-off-by: Don Skidmore Signed-off-by: Eric W Multanen Signed-off-by: Jeff Kirsher --- drivers/net/ixgbe/ixgbe_dcb_nl.c | 20 ++++++++++++++++++++ include/linux/dcbnl.h | 2 ++ net/dcb/dcbnl.c | 6 ++++-- 3 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ixgbe/ixgbe_dcb_nl.c index 7d158a5c545b..8ac639d0da03 100644 --- a/drivers/net/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ixgbe/ixgbe_dcb_nl.c @@ -93,6 +93,8 @@ int ixgbe_copy_dcb_cfg(struct ixgbe_dcb_config *src_dcb_cfg, dst_dcb_cfg->bcn.rp_admin_mode[i - DCB_BCN_ATTR_RP_0] = src_dcb_cfg->bcn.rp_admin_mode[i - DCB_BCN_ATTR_RP_0]; } + dst_dcb_cfg->bcn.bcna_option[0] = src_dcb_cfg->bcn.bcna_option[0]; + dst_dcb_cfg->bcn.bcna_option[1] = src_dcb_cfg->bcn.bcna_option[1]; dst_dcb_cfg->bcn.rp_alpha = src_dcb_cfg->bcn.rp_alpha; dst_dcb_cfg->bcn.rp_beta = src_dcb_cfg->bcn.rp_beta; dst_dcb_cfg->bcn.rp_gd = src_dcb_cfg->bcn.rp_gd; @@ -457,6 +459,12 @@ static void ixgbe_dcbnl_getbcncfg(struct net_device *netdev, int enum_index, struct ixgbe_adapter *adapter = netdev_priv(netdev); switch (enum_index) { + case DCB_BCN_ATTR_BCNA_0: + *setting = adapter->dcb_cfg.bcn.bcna_option[0]; + break; + case DCB_BCN_ATTR_BCNA_1: + *setting = adapter->dcb_cfg.bcn.bcna_option[1]; + break; case DCB_BCN_ATTR_ALPHA: *setting = adapter->dcb_cfg.bcn.rp_alpha; break; @@ -516,6 +524,18 @@ static void ixgbe_dcbnl_setbcncfg(struct net_device *netdev, int enum_index, struct ixgbe_adapter *adapter = netdev_priv(netdev); switch (enum_index) { + case DCB_BCN_ATTR_BCNA_0: + adapter->temp_dcb_cfg.bcn.bcna_option[0] = setting; + if (adapter->temp_dcb_cfg.bcn.bcna_option[0] != + adapter->dcb_cfg.bcn.bcna_option[0]) + adapter->dcb_set_bitmap |= BIT_BCN; + break; + case DCB_BCN_ATTR_BCNA_1: + adapter->temp_dcb_cfg.bcn.bcna_option[1] = setting; + if (adapter->temp_dcb_cfg.bcn.bcna_option[1] != + adapter->dcb_cfg.bcn.bcna_option[1]) + adapter->dcb_set_bitmap |= BIT_BCN; + break; case DCB_BCN_ATTR_ALPHA: adapter->temp_dcb_cfg.bcn.rp_alpha = setting; if (adapter->temp_dcb_cfg.bcn.rp_alpha != diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index e73a61449ad6..b0ef274e0031 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -305,6 +305,8 @@ enum dcbnl_bcn_attrs{ DCB_BCN_ATTR_RP_7, DCB_BCN_ATTR_RP_ALL, + DCB_BCN_ATTR_BCNA_0, + DCB_BCN_ATTR_BCNA_1, DCB_BCN_ATTR_ALPHA, DCB_BCN_ATTR_BETA, DCB_BCN_ATTR_GD, diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index fc88fc4d4f63..5dbfe5fdc0d6 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -140,6 +140,8 @@ static struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = { [DCB_BCN_ATTR_RP_6] = {.type = NLA_U8}, [DCB_BCN_ATTR_RP_7] = {.type = NLA_U8}, [DCB_BCN_ATTR_RP_ALL] = {.type = NLA_FLAG}, + [DCB_BCN_ATTR_BCNA_0] = {.type = NLA_U32}, + [DCB_BCN_ATTR_BCNA_1] = {.type = NLA_U32}, [DCB_BCN_ATTR_ALPHA] = {.type = NLA_U32}, [DCB_BCN_ATTR_BETA] = {.type = NLA_U32}, [DCB_BCN_ATTR_GD] = {.type = NLA_U32}, @@ -922,7 +924,7 @@ static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlattr **tb, goto err_bcn; } - for (i = DCB_BCN_ATTR_ALPHA; i <= DCB_BCN_ATTR_RI; i++) { + for (i = DCB_BCN_ATTR_BCNA_0; i <= DCB_BCN_ATTR_RI; i++) { if (!getall && !bcn_tb[i]) continue; @@ -980,7 +982,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlattr **tb, data[i]->nla_type - DCB_BCN_ATTR_RP_0, value_byte); } - for (i = DCB_BCN_ATTR_ALPHA; i <= DCB_BCN_ATTR_RI; i++) { + for (i = DCB_BCN_ATTR_BCNA_0; i <= DCB_BCN_ATTR_RI; i++) { if (data[i] == NULL) continue; value_int = nla_get_u32(data[i]); -- cgit v1.2.3 From 209aa4fdc39eacc145a7f9c32a4b9ffcc68912c6 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Fri, 12 Dec 2008 16:35:40 +0900 Subject: fb: SH-5 uses __raw I/O accessors now also, drop the special casing. Signed-off-by: Paul Mundt --- include/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index 75a81eaf3430..1ee63df5be92 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -888,7 +888,7 @@ struct fb_info { #define fb_writeq sbus_writeq #define fb_memset sbus_memset_io -#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || (defined(__sh__) && !defined(__SH5__)) || defined(__powerpc__) || defined(__avr32__) +#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || defined(__sh__) || defined(__powerpc__) || defined(__avr32__) #define fb_readb __raw_readb #define fb_readw __raw_readw -- cgit v1.2.3 From 8564557a03c12adb9c4b76ae1e86db4113a04d13 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 19 Dec 2008 15:34:41 +0900 Subject: video: sh_mobile_lcdcfb deferred io support This patch adds sh_mobile_lcdcfb deferred io support for SYS panels. The LCDC hardware block managed by the sh_mobile_lcdcfb driver supports RGB or SYS panel configurations. SYS panels come with an external display controller that is resposible for refreshing the actual LCD panel. RGB panels are controlled directly by the LCDC and they need to be refreshed by the LCDC hardware. In the case of SYS panels we can save some power by configuring the LCDC hardware block in one-shot mode. In this one-shot mode panel refresh is managed by software. This works well together with deferred io since it allows us to stop clocks for most of the time and only enable clocks when we actually want to trigger an update. When there is no fbdev activity the clocks are kept stopped which allows us to deep sleep. The refresh rate in deferred io mode is set using platform data. The same platform data can also be used to disable deferred io mode. As with other deferred io frame buffers user space code should use fsync() on the frame buffer device to trigger an update. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/video/Kconfig | 1 + drivers/video/sh_mobile_lcdcfb.c | 170 ++++++++++++++++++++++++++++++++++----- include/video/sh_mobile_lcdc.h | 1 + 3 files changed, 150 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index dd483bfe3951..d0c821992a99 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1893,6 +1893,7 @@ config FB_SH_MOBILE_LCDC select FB_SYS_COPYAREA select FB_SYS_IMAGEBLIT select FB_SYS_FOPS + select FB_DEFERRED_IO ---help--- Frame buffer driver for the on-chip SH-Mobile LCD controller. diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c index e339d829183c..0e2b8fd24df1 100644 --- a/drivers/video/sh_mobile_lcdcfb.c +++ b/drivers/video/sh_mobile_lcdcfb.c @@ -16,7 +16,9 @@ #include #include #include +#include #include